pax_global_header00006660000000000000000000000064147415777720014537gustar00rootroot0000000000000052 comment=464180ff28e6a3f74f7c754ec01ed8a6a2f978df blis-1.1/000077500000000000000000000000001474157777200123315ustar00rootroot00000000000000blis-1.1/.appveyor.yml000066400000000000000000000042741474157777200150060ustar00rootroot00000000000000skip_branch_with_pr: true environment: matrix: - LIB_TYPE: shared CONFIG: auto CC: gcc THREADING: pthreads CBLAS: no - LIB_TYPE: static CONFIG: auto CC: clang THREADING: no - LIB_TYPE: shared CONFIG: x86_64 CC: clang THREADING: pthreads - LIB_TYPE: static CONFIG: auto CC: clang THREADING: openmp - LIB_TYPE: static CONFIG: auto CC: clang THREADING: openmp SANDBOX: yes install: - set "PATH=C:\msys64\mingw64\bin;C:\msys64\bin;%PATH%" - if [%CC%]==[clang] set "PATH=C:\Program Files\LLVM\bin;%PATH%" - if [%CC%]==[clang] set "AR=llvm-ar" - if [%CC%]==[clang] set "AS=llvm-as" - if [%CC%]==[clang] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 build_script: - if [%LIB_TYPE%]==[shared] set "CONFIGURE_OPTS=%CONFIGURE_OPTS% --enable-shared --disable-static" - if [%LIB_TYPE%]==[static] set "CONFIGURE_OPTS=%CONFIGURE_OPTS% --disable-shared --enable-static" - if not [%CBLAS%]==[no] set "CONFIGURE_OPTS=%CONFIGURE_OPTS% --enable-cblas" - if [%SANDBOX%]==[yes] set "CONFIGURE_OPTS=%CONFIGURE_OPTS% -s gemmlike" - set RANLIB=echo - set LIBPTHREAD= - set "PATH=%PATH%;C:\blis\lib" - set "CFLAGS=-Wno-macro-redefined" - bash -lc "cd /c/projects/blis && ./configure %CONFIGURE_OPTS% --enable-threading=%THREADING% --enable-arg-max-hack --prefix=/c/blis %CONFIG%" - bash -lc "cd /c/projects/blis && mingw32-make -j4 V=1" - bash -lc "cd /c/projects/blis && mingw32-make install" - 7z a C:\blis.zip C:\blis - ps: Push-AppveyorArtifact C:\blis.zip test_script: # "make checkblas" does not work with shared linking Windows due to inability to override xerbla_ - if [%LIB_TYPE%]==[shared] set "TEST_TARGET=checkblis-fast" - if [%LIB_TYPE%]==[static] set "TEST_TARGET=check" - bash -lc "cd /c/projects/blis && mingw32-make %TEST_TARGET% -j4 V=1" # Enable this to be able to login to the build worker. You can use the # `remmina` program in Ubuntu, use the login information that the line below # prints into the log. #on_finish: #- ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) blis-1.1/.dir-locals.el000066400000000000000000000016551474157777200147710ustar00rootroot00000000000000;; Emacs formatting for the BLIS layout requirements. ( ;; Recognize *.mk files as Makefile fragments (auto-mode-alist . (("\\.mk\\'" . makefile-mode)) ) ;; Makefiles require tabs and are almost always width 8 (makefile-mode . ( (indent-tabs-mode . t) (tab-width . 8) ) ) ;; C code formatting roughly according to docs/CodingConventions.md (c-mode . ( (c-file-style . "bsd") (c-basic-offset . 4) (comment-start . "// ") (comment-end . "") (parens-require-spaces . nil) ) ) ;; Default formatting for all source files not overriden above (prog-mode . ( (indent-tabs-mode . nil) (tab-width . 4) (require-final-newline . t) (eval add-hook `before-save-hook `delete-trailing-whitespace) ) ) ) blis-1.1/.gitignore000066400000000000000000000014531474157777200143240ustar00rootroot00000000000000# -- generic files to ignore -- # emacs backup files *~ # vim backup files *.swp # NFS file .nfs* # -- compiler-related -- # object files # NOTE: This will result in git also exluding the top-level obj directory # since its only contents are .o files. *.o # static library archives # NOTE: This will result in git also exluding the top-level lib directory # since its only contents are .a files. *.a *.so *.so.* # test executables *.x *.pexe *.nexe *.js # link map files *.map # -- build system files -- config.mk bli_config.h bli_addon.h # -- monolithic headers -- include/*/*.h # -- makefile fragments -- .fragment.mk # -- misc. -- # BLIS testsuite output file output.testsuite output.testsuite.* # BLAS test output files out.* # GTAGS database GPATH GRTAGS GTAGS # Mac DS.store files .DS_Store blis-1.1/.travis.yml000066400000000000000000000134701474157777200144470ustar00rootroot00000000000000language: c sudo: required dist: focal branches: only: - master - dev - amd matrix: include: # full testsuite (all tests + mixed datatype (gemm_nn only) + salt + SDE + OOT) - os: linux compiler: gcc env: OOT=1 TEST=ALL SDE=1 THR="none" CONF="x86_64" \ PACKAGES="gcc-9 binutils" # openmp build - os: linux compiler: gcc env: OOT=0 TEST=FAST SDE=0 THR="openmp" CONF="auto" \ PACKAGES="gcc-9 binutils" # pthreads build - os: linux compiler: gcc env: OOT=0 TEST=FAST SDE=0 THR="pthreads" CONF="auto" \ PACKAGES="gcc-9 binutils" # clang build - os: linux compiler: clang env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="auto" # There seems to be some difficulty installing two Clang toolchains of # different versions. # Use the TravisCI default. # PACKAGES="clang-8 binutils" # macOS with system compiler (clang) - os: osx compiler: clang env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="auto" # cortexa15 build and fast testsuite (qemu) - os: linux compiler: arm-linux-gnueabihf-gcc env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="cortexa15" \ CC=arm-linux-gnueabihf-gcc CXX=arm-linux-gnueabihf-g++ \ PACKAGES="gcc-arm-linux-gnueabihf g++-arm-linux-gnueabihf libc6-dev-armhf-cross qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-arm -cpu cortex-a15 -L /usr/arm-linux-gnueabihf/" # cortexa57 build and fast testsuite (qemu) - os: linux compiler: aarch64-linux-gnu-gcc env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="cortexa57" \ CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ \ PACKAGES="gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/" # Apple M1 (firestorm) build and fast testsuite (qemu) - os: linux compiler: aarch64-linux-gnu-gcc env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="firestorm" \ CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ \ PACKAGES="gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/" # armsve build and fast testsuite (qemu) - os: linux compiler: aarch64-linux-gnu-gcc-10 env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="armsve" \ CC=aarch64-linux-gnu-gcc-10 CXX=aarch64-linux-gnu-g++-10 \ PACKAGES="gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-aarch64 -cpu max,sve=true,sve512=true -L /usr/aarch64-linux-gnu/" # arm64 build and fast testsuite (qemu) # NOTE: This entry omits the -cpu flag so that while both NEON and SVE kernels # are compiled, only NEON kernels will be tested. (h/t to RuQing Xu) - os: linux compiler: aarch64-linux-gnu-gcc-10 env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="arm64" \ CC=aarch64-linux-gnu-gcc-10 CXX=aarch64-linux-gnu-g++-10 \ PACKAGES="gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/" # The RISC-V targets require the qemu version available in jammy or newer. # When CI is upgraded, the packages should be activated and do_script.sh # cleaned up. # PACKAGES="qemu-user qemu-user-binfmt" - os: linux compiler: riscv64-unknown-linux-gcc env: OOT=0 TEST=FAST SDE=0 THR="none" BLD="--disable-shared" CONF="rv64iv" \ CC=riscv64-unknown-linux-gnu-gcc \ LDFLAGS=-static - os: linux compiler: riscv32-unknown-linux-gcc env: OOT=0 TEST=FAST SDE=0 THR="none" BLD="--disable-shared" CONF="rv32iv" \ CC=riscv32-unknown-linux-gnu-gcc \ LDFLAGS=-static - os: linux compiler: clang env: OOT=0 TEST=FAST SDE=0 THR="none" BLD="--disable-shared" CONF="sifive_x280" \ CC=clang \ LDFLAGS=-static install: - if [ "$CC" = "gcc" ] && [ "$TRAVIS_OS_NAME" = "linux" ]; then export CC="gcc-9"; fi - if [ -n "$PACKAGES" ] && [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo apt-get install -y $PACKAGES; fi script: - export DIST_PATH=. - pwd - if [ $OOT -eq 1 ]; then export DIST_PATH=`pwd`; mkdir ../oot; cd ../oot; chmod -R a-w $DIST_PATH; fi - pwd - if [ "$CONF" = "rv64iv" ]; then $DIST_PATH/travis/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-gcc; export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv64-unknown-linux-gnu-g++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; fi - if [ "$CONF" = "rv32iv" ]; then $DIST_PATH/travis/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-gcc; export CXX=$DIST_PATH/../toolchain/riscv/bin/riscv32-unknown-linux-gnu-g++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv32 -cpu rv32,vext_spec=v1.0,v=true,vlen=128 -B 0x100000"; fi - if [ "$CONF" = "sifive_x280" ]; then $DIST_PATH/travis/do_riscv.sh "$CONF"; export CC=$DIST_PATH/../toolchain/riscv/bin/clang; export CXX=$DIST_PATH/../toolchain/riscv/bin/clang++; export TESTSUITE_WRAPPER="$DIST_PATH/../toolchain/qemu-riscv64 -cpu rv64,vext_spec=v1.0,v=true,vlen=512 -B 0x100000"; fi - $DIST_PATH/configure -p `pwd`/../install -t $THR $BLD CC=$CC $CONF - pwd - ls -l - $CC --version - $CC -v - make -j 2 - make install - if [ "$BLD" = "" ]; then $DIST_PATH/travis/cxx/cxx-test.sh $DIST_PATH $(ls -1 include); fi # Qemu SVE is failing sgemmt in some cases. Skip as this issue is not observed # on real chip (A64fx). - if [ "$CONF" = "armsve" ]; then sed -i 's/.*\.*/0/' $DIST_PATH/testsuite/input.operations.fast; fi - if [ "$TEST" != "0" ]; then travis_wait 30 $DIST_PATH/travis/do_testsuite.sh; fi - if [ "$SDE" = "1" ]; then travis_wait 30 $DIST_PATH/travis/do_sde.sh; fi blis-1.1/CHANGELOG000066400000000000000000046721221474157777200135610ustar00rootroot00000000000000commit c00b9c748dbf271e5719dedd1184383efe662b44 Author: Devin Matthews Date: Thu Dec 19 11:47:44 2024 -0600 Version file update (1.1) commit 623f6b9524204dbf6965f34cda52b23d6a51da96 Author: Devin Matthews Date: Thu Dec 19 11:41:26 2024 -0600 ReleaseNotes.md update. commit a1709671a553bcfab2a16c6ed9f650bc5f1a7e84 Author: Devin Matthews Date: Thu Dec 19 11:41:12 2024 -0600 CREDITS file update. commit d6d2c88adebc99bd4a8720be2b2ab20a85794712 Author: Field G. Van Zee Date: Thu Aug 8 13:34:37 2024 -0500 Fixed out-of-bounds read bug in sup haswell ukr. (#824) Details: - Fixed a bug in the bli_sgemmsup_rd_haswell_asm_1x16n() millikernel. The kernel was erroneously performing an out-of-bounds read whenever the singleton edge case loop executed (that is, whenever the k dimension of the millikernel problem was not a multiple of 8). This OOB error was the result of a copy-paste bug; when developing the s1x16n function, I started from a copy of the s2x16n function, but then failed to delete the instruction that reads the second element of A in the code that handles the PR loop's edge case. Thanks to @j-bm for reporting this bug in Issue #821 and helping narrow down the cause to the rax register. - CREDITS file update. commit e6f7d80c700a253e7c52a74425eb3bef00bcb3fb Author: Field G. Van Zee Date: Wed Jun 26 16:18:21 2024 -0500 Fix a bug in the piledriver microkernels. (#814) Details: - At some point, the piledriver (and bulldozer and excavator) microkernel tests via SDE had been removed from Travis CI testing. This PR re-enables them. - A bug in the piledriver complex gemm microkernels has also been fixed. The beta*C product was not being correctly added to the A*B product before writing back out to memory. - Fixes #811. - (cherry picked from commit 31ecf820b9eb3368ad907ae6b192bf7397ebc92c) commit dce9d2a1f903edb6b9055d269cf28be55ce86527 Author: Field G. Van Zee Date: Wed Jun 26 16:14:34 2024 -0500 Add ScaLAPACK compatibility mode. (#813) Details: - Add configure options '--enable-scalapack-compat' and '--disabled-scalapack-compat' (default disabled). - Add a macro BLIS_{ENABLE,DISABLE}_SCALAPACK_COMPAT to bli_config.h. - This option and macro control any changes to the API necessary to maintain compatibility with ScaLAPACK. Currently, this only means disabling the complex versions of syr, syr2, and symv. In the future, other changes could be controlled by the same flag. - Complex syr2 wasn't enabled at the same time that complex syr and symv were. This is now corrected. - (cherry picked from commit 415893066e966159799d96166cadcf9bb5535b1c) Fixed typo in 4158930; variable renames. (#815) Details: - Fixed a typo in the "./configure --help" output for the ScaLAPACK compatibility option implemented in 4158930. - Trivial variable renames. - (cherry picked from commit 8820f8f91efd32e38e2995e73323656ef767bbd8) commit 1a6772feb1749faa2b42d30ae720738087ef6967 Author: Field G. Van Zee Date: Tue Jun 4 13:47:05 2024 -0500 Fix SyntaxWarning messages from python 3.12 (#809) Details: - When using regexes in Python, certain characters need backslash escaping, e.g.: regex = re.compile( '^[\s]*#include (["<])([\w\.\-/]*)([">])' ) However, technically escape sequences like `\s` are not valid and should actually be double-escaped: `\\s`. Python 3.12 now warns about such escape sequences, and in a later version these warning will be promoted to errors. See also: https://docs.python.org/dev/whatsnew/3.12.html#other-language-changes The fix here is to use Python's "raw strings" to avoid double-escaping. This issue can be checked for all files in the current directory with the command: python -m compileall -d . -f -q . Thanks to @AngryLoki for the fix. - (cherry picked from commit 729c57c15aa50030145ff702626c31839ded3502) Update CREDITS - (cherry picked from commit 5cbec6503de335b3b63fa5d4f388fddd3aff2b61) commit 49af2243c2a60ed8fedb44f237f4ec100465cd89 Author: Field G. Van Zee Date: Mon May 6 14:07:33 2024 -0500 ReleaseNotes.md update. Details: - (cherry picked from commit 06dddf1e51ccff70d77ee8cb731c3217e70eb730) CHANGELOG update (1.0) Details: - (cherry picked from commit a876918c8c79a1c3d3d95de1f283350b7249b8ae) Version file update (1.0) Details: - (cherry picked from commit c2af113c7ba6d0dcc128ba36ec6e140d89180cf3) commit 7d486312c8c04afb81e2e424daf25aa65f758069 Author: Field G. Van Zee Date: Tue Apr 30 17:13:15 2024 -0500 Use "-i auto" by default in test/3 drivers. Details: - Request default induced method behavior of BLIS via "-i auto" when running the standalone performance drivers in test/3 via the runme.sh script present in that directory. (Previously, the runme.sh script would use "-i native" by default.) This change was originally intended for fd1a7e3. - (cherry picked from commit cad51491e8a0b306015a5a02881dc2a9b60dd8d9) commit 5eff5f931bc97decaf318dc8efa1cbcf33a09eb5 Author: Field G. Van Zee Date: Tue Apr 30 17:12:49 2024 -0500 Allow test/3 drivers to use default ind_t method. (#804) Details: - Previously, the standalone performance drivers in test/3 were written under the assumption that the user would want to explicitly test either native execution *or* 1m. But because the accompanying runme.sh script defaults to passing "native" in for the -i command line option (which explicitly sets the induced method type), running the script without modification causes the test drivers to use slow reference microkernels on systems where native complex-domain microkernels are not registered -- which will yield poor performance for complex-domain level-3 operations. Furthermore, even if a user was aware of this, the test drivers did not support any single value for the -i option that would test BLIS using the library's default behavior -- that is, using 1m on systems where it is needed and native execution on systems that have native microkernels implemented and registered. - This commit addresses the aforementioned issue by supporting a new value for the -i option: "auto". The "auto" value causes the driver to avoid explicitly setting the induced method altogether, leaving BLIS's default behavior in place. This "auto" option is also now the default setting within the runme.sh script. Thanks to Leick Robinson for finding and reporting this issue. - Also added support for "nat" as a shorthand for "native", which the help text already (erroneously) claimed was supported. - (cherry picked from commit fd1a7e3ca9547718aa61c806848099705216182b) commit 968c9be404763b48e72f218598c7edd2bd571780 Author: Field G. Van Zee Date: Tue Dec 12 13:34:31 2023 -0600 Include bli_config.h before bli_system.h in cblas.h. (#789) Details: - Previously, in cblas.h, bli_config.h was being #included *after* bli_system.h, which meant that the BLIS_ENABLE_SYSTEM macro was never defined in time for proper OS detection. This bug only affected cblas.h -- blis.h had been correctly #including bli_config.h before bli_system.h since fb93d24. Thanks to Edward Smyth for reporting this bug and suggesting the fix. - (cherry picked from commit a72e4569f2a03cc3578c019bf7ce25491a44137d) commit 4e68494012722323212139647cdfb944553a4842 Author: Field G. Van Zee Date: Tue Dec 12 13:34:06 2023 -0600 Fixed random segfault in test/3 drivers. (#788) Details: - Fixed a segfault in the non-gemm test drivers in test/3 that was the result of sometimes leaving either .n_str or .k_str fields of the params_t struct uninitialized, depending on the operation in question. For example, in test_hemm.c, init_def_params() would only initialize the .m_str and .n_str fields, but not the .k_str field. Even though hemm doesn't use a 'k' dimension, the proc_params() function (called via parse_cl_params()) universally attempts to convert all three into integers via sscanf(), which was understandably failing when one of those strings was a NULL pointer. I'm not sure how this code ever worked to begin with. Special thanks to Leick Robinson for finding and reporting this bug. - (cherry picked from commit 1236ddab455ef3a6293ab394ff06b3a19c2913d9) commit 8109d18972c4211909ffa978f7a37db669ddc8b0 Author: Field G. Van Zee Date: Tue Dec 12 13:32:11 2023 -0600 Install helper headers to INCDIR prefix. (#787) Details: - Install one-line headers to INCDIR whose entire purpose is to #include the actual headers within the local 'blis' header directory so that applications can #include "blis.h" instead of #include (and/or "cblas.h" instead of if CBLAS is enabled) when headers are installed to global paths. (Note that INCDIR is the installation prefix for headers as specified by '--includedir=INCDIR', which defaults to 'PREFIX/include' if not specified.) Not sure how this problem went unreported for so long, since presumably any user trying to #include "blis.h" from a global installation would have encountered a compiler error. - The one-line blis.h and cblas.h headers now reside in the 'build' directory, ready to install as is. - Thanks to to Jed Brown for reporting this via Issue #786, and for Devin Matthews and Mo Zhou for their engagement. - Harmonized the rule in the top-level Makefile for installing blis.pc into SHAREDIR/pkgconfig with conventions for others vis-a-vis verbosity/non-verbosity. - (cherry picked from commit 141a6c9a8e7557d9c7d28aecedec9dc5377dba13) commit f7ce54a252028483e4c6af619015eb22063d5541 Author: Field G. Van Zee Date: Fri Nov 3 15:52:57 2023 -0500 CREDITS file update. commit 05388ddb66f8bf2d62009b162d64bf2d99226b83 Author: Aaron Hutchinson <113382047+Aaron-Hutchinson@users.noreply.github.com> Date: Fri Nov 3 13:30:31 2023 -0700 Added 'sifive_x280' subconfig, kernel set. (#737) Details: - Added a new 'sifive_x280' subconfiguration for SiFive's x280 RISC-V instruction set architecture. The subconfig registers kernels from a correspondingly new kernel set, also named 'sifive_x280'. - Added the aforementioned kernel set, which includes intrinsics- and assembly-based implementations of most level-1v kernels along with level-1f kernels axpy2v dotaxpyv, packm kernels, and level-3 gemm, gemmtrsm_l, and gemmtrsm_u microkernels (plus supporting files). - Registered the 'sifive_x280' subconfig as belonging to a singleton family by the same name. - Added an entry to '.travis.yml' to test the new subconfig via qemu. - Updates to 'travis/do_riscv.sh' script to support the 'sifive_x280' subconfig and to reflect updated tarball names. - Special thanks to Lee Killough, Devin Matthews, and Angelika Schwarz for their engagement on this commit. commit 7a87e57b69d697a9b06231a5c0423c00fa375dc1 Author: Srinivas Yadav <43375352+srinivasyadav18@users.noreply.github.com> Date: Sat Oct 14 02:05:41 2023 -0500 Fixed HPX barrier synchronization (#783) Details: - Fixed hpx barrier synchronization. HPX was hanging on larger cores because blis was using non-hpx synchronization primitives. But when using hpx-runtime only hpx-synchronization primitives should be used. Hence, a C style wrapper hpx_barrier_t is introduced to perform hpx barrier operations. - Replaced hpx::for_loop with hpx::futures. Using hpx::for_loop with hpx::barrier on n_threads greater than actual hardware thread count causes synchronization issues making hpx hanging. This can be avoided by using hpx::futures, which are relatively very lightweight, robust and scalable. commit 8fff1e31da1c87e46cacec112b0ac280ab47cd8b Author: Field G. Van Zee Date: Thu Oct 12 15:51:41 2023 -0500 Fixed bug in sup threshold registration. (#782) Details: - Fixed a bug that resulted in BLIS non-deterministically calling the gemmsup handler, irrespective of the thresholds that are registered via bli_cntx_set_blkszs(). - Deep dive: In bli_cntx_init_ref.c, the default values for the gemmsup thresholds (BLIS_[MNK]T blocksizes) wre being set to zero so that no operation ever matched the criteria for gemmsup (unless specific sup thresholds are registered). HOWEVER, these thresholds are set via bli_cntx_set_blkszs() which calls bli_blksz_copy_if_pos(), which was only coping the thresholds into the gks' cntx_t if the values were strictly positive. Thus, the zero values passed into bli_cntx_set_blkszs() were being ignored and those threshold slots within the gks were left uninitialized. The upshot of this is that the reference gemmsup handler was being called for gemm problems essentially at random (and as it turns out, very rarely the reference gemmsup implementation would encounter a divide-by-zero error). - The problem was fixed by changing bli_blksz_copy_if_pos() so that it copies values that are non-negative (values >= 0 instead of > 0). The function was also renamed to bli_blksz_copy_if_nonneg() - Also needed to standardize use of -1 as the sole value to embed into blksz_t structs as a signal to bli_cntx_set_blkszs() to *not* register a value for that slot (and instead let whatever existing values remain). This required updates to the bli_cntx_init_*() functions for bgq, cortexa9, knc, penryn, power7, and template subconfigs, as some of these codes were using 0 instead of -1. - Fixes #781. Thanks to Devin Matthews for identifying, diagnosing, and proposing a fix for this issue. commit 1e264a42474b535431768ef925bbd518412d392e Author: Abhishek Bagusetty <59661409+abagusetty@users.noreply.github.com> Date: Mon Oct 2 18:29:46 2023 -0500 Update zen3 subconfig to support NVHPC compilers. (#779) Details: - Parse $(CC_VENDOR) values of "nvc" in 'zen3' make_defs.mk file. - Minor refactor to accommodate above edit. - CREDITS file update. commit c2099ed2519dcac8ee421faf999b36e1c2260be7 Author: Field G. Van Zee Date: Mon Oct 2 14:56:48 2023 -0500 Fixed brokenness when sba is disabled. (#777) Details: - Previously, disabling the sba via --disable-sba-pools resulted in a segfault due to a sanity-check-triggering abort(). The problem was that the sba, as currently used in the l3 thread decorators, did not yet (fully) support pools being disabled. The solution entailed creating wrapper function, bli_sba_array_elem(), which either calls bli_apool_array_elem() (when sba pools are enabled at configure time) or returns a NULL sba_pool pointer (when sba pools are disabled), and calling bli_sba_array_elem() in place of bli_apool_array_elem(). Note that the NULL pointer returned by bli_sba_array_elem() when the sba pools are disabled does no harm since in that situation the pointer goes unreferenced when acquiring and releasing small blocks. Thanks to John Mather for reporting this bug. - Guarded the bodies of bli_sba_init() and bli_sba_finalize() with #ifdef BLIS_ENABLE_SBA_POOLS. I don't think this was actually necessary to fix the aforementioned bug, but it seems like good practice. - Moved the code in bli_l3_thrinfo_create() that checked that the array* pointer is non-NULL before calling bli_sba_array_elem() (previously bli_apool_array_elem()) into the definition of bli_sba_array_elem(). - Renamed various instances of 'pool' variables and function parameters to 'sba_pool' to emphasize what kind of pool it represents. - Whitespace changes. commit 37ca4fd168525a71937d16aaf6a13c0de5b4daef Author: Field G. Van Zee Date: Thu Sep 28 16:37:57 2023 -0500 Implemented [cz]symv_(), [cz]syr_(), [cz]rot_(). (#778) Details: - Expanded existing BLAS compatibility APIs to provide interfaces to [cz]symv_(), [cz]syr_(). This was easy since those operations were already implemented natively in BLIS; the APIs were previously omitted only because they were not formally part of the BLAS. - Implemented [cz]rot_() by feeding code from LAPACK 3.11 through f2c. - Thanks to James Foster for pointing out that LAPACK contains these additional symbols, which prompted these additions, as well as for testing the [cz]rot_() functions from Julia's test infrastructure. - CREDITS file update. commit 6f412204004666abac266409a203cb635efbabf3 Author: Field G. Van Zee Date: Tue Sep 26 18:00:54 2023 -0500 Added 'altra', 'altramax' subconfigs. (#775) Details: - Forward-ported 'altra' and 'altramax' subconfigurations from the older 'stable' branch lineage [1]. These subconfigs primarily target the Ampere Altra and AltraMax (ARM) processors. They also contain "QuickStart" directories with information and scripts to help use BLIS on these microarchitectures. Thanks to Jeff Diamond and Leick Robinson for developing these subconfigs and resources. - Updated kernels/armv8a/3/bli_gemm_armv8a_asm_d6x8.c according to changes in the 'stable' lineage, mostly related to re-enabling of assembly code branches that target general stride IO. [1] Note that the 'stable' branch is being used to make sure that more recent commits do not introduce unreasonable performance regressions. As such, the name should be interpreted as shorthand for "performance stable," not "API stable." commit a4a63295b96ed5b32f4df6477d24db07bf431202 Author: Srinivas Yadav <43375352+srinivasyadav18@users.noreply.github.com> Date: Tue Sep 26 17:58:38 2023 -0500 Fixes to HPC runtime code path. (#773) Details: - Fixed hpx::for_each invocation and replace with hpx::for_loop. The HPX runtime was initialized using hpx::start, but the hpx::for_each function was being called on a non-hpx runtime (i.e standard BLIS runtime - single main thread). To run hpx::for_each on HPX runtime correctly, the code now uses hpx::run_as_hpx_thread(func, args...). - Replaced hpx::for_each with hpx::for_loop, which eliminates use of hpx::util::counting_iterator. - Employ hpx::execution::chunk_size(1) to make sure that a thread resides on a particular core. - Replaced hpx::apply() with updated version hpx::post(). - Initialize tdata->id = 0 in libblis.c to 0, as it is the main thread and is needed for writing results to output file. - By default, if not specified, the HPX runtime uses all N threads/cores available in the system. But, if we want to only specify n_threads out N threads, we use hpx::execution::experimental::num_cores(n_threads). commit c6546c1131b1ddd45ef13f9f2b620ce2e955dbf8 Author: John Mather <54645798+jmather-sesi@users.noreply.github.com> Date: Wed Sep 20 13:41:07 2023 -0400 Fixed broken link in Multithreading.md. (#774) Details: - Replaced 404'd link in docs/Multithreading.md with an archive from The Wayback Machine. - CREDITS file update. commit 6dcf7666eff14348e82fbc2750be4b199321e1b9 Author: Field G. Van Zee Date: Sun Aug 27 14:18:57 2023 -0500 Revamped bli_init() to use TLS where feasible. (#767) Details: - Revamped bli_init_apis() and bli_finalize_apis() to use separate bli_pthread_switch_t objects for each of the five sub-API init functions, with the objects for the 'ind' and 'rntm' sub-APIs being declared with BLIS_THREAD_LOCAL. This allows some APIs to be treated as thread-local and the rest as thread-shared. Thanks to Edward Smyth for requesting application thread-specific rntm_t structs, which inspired these change. - Combined bli_thread_init_from_env() and bli_pack_init_from_env() into a new function, bli_rntm_init_rntm_from_env(), and placed the combined code in bli_rntm.c inside of a new bli_rntm_init() function. Then removed the (now empty) bli_pack_init() and _finalize() function defs. - Deprecated bli_rntm_init() for the purposes of initializing a rntm_t (temporarily preserving it as bli_rntm_clear() in a cpp-undefined code block) so that the function name could be used for the aforementioned bli_rntm_init() function. - Updated libblis_test_pobj_create() in test_libblis.c to use a static rntm_t initializer instead of the deprecated bli_rntm_init() function-based option. - Minor updates to docs/Multithreading.md, including removal of bli_rntm_init() in the example of how to initialize rntm_t structs. - Changed the return value of bli_gks_init(), bli_ind_init(), bli_memsys_init(), bli_thread_init(), and bli_rntm_init() (and their finalize() counterparts) from 'void' to 'int' so that those functions match the function type expected by bli_pthread_switch_on()/_off(). Those init/finalize functions now return 0 to indicate success, which is needed so that the switch actually changes state from off to on and vice versa. - Defined bli_thread_reset(), which copies the contents of the global_rntm_at_init() struct into the global_rntm struct (for the current application thread). - Guard calls to bli_pthread_mutex_lock()/_unlock() in - bli_pack_set_pack_a() and _pack_b() - bli_rntm_init_from_global() - bli_thread_set_ways() - bli_thread_set_num_threads() - bli_thread_set_thread_impl() - bli_thread_reset() - bli_l3_ind_oper_set_enable() with #ifdef BLIS_DISABLE_TLS (since TLS precludes the possibility of race conditions). - In frame/base/bli_rntm.c, declare global_rntm, global_rntm_at_init, and global_rntm_mutex as BLIS_THREAD_LOCAL so that separate application threads can change the number of ways of BLIS parallelism independently from one another. - Access global_rntm only via a new private (not exported) function, bli_global_rntm(). Defined a similar function for a rntm_t new to this commit, global_rntm_at_init, which preserves the state of the global rntm at initialization-time. - In frame/3/bli_l3_ind.c, added a guard to the declaration of the static variable oper_st_mutex with #ifdef BLIS_DISABLE_TLS so that the mutex is omitted altogether when TLS is enabled (which prevents the compiler from warning about an unused variable). - Removed redundant code from bli_thread.c: #ifdef BLIS_ENABLE_HPX #include "bli_thread_hpx.h" #endif since this code is already present in bli_thread.h. - Thanks to Minh Quan Ho for his review of and feedback on this commit. - Comment updates. commit fa6a9b24ae2ddbd5f30f657d46004843581c768c Author: Field G. Van Zee Date: Sat Aug 19 12:44:34 2023 -0500 Fixed error when using common.mk from testsuite. (#768) Details: - Commit 2db31e0 (#755) inserted logic into common.mk that attempts to preprocess build/detect/android/bionic.h to determine whether the __BIONIC__ macro is defined (in which case -lrt should not be included in LDFLAGS). However, the path to bionic.h was encoded without regard to DIST_PATH, and so utilizing common.mk anywhere that isn't the top- level directory (such as in the testsuite directory) resulted in a compiler error: gcc: error: build/detect/android/bionic.h: No such file or directory gcc: fatal error: no input files compilation terminated. This commit adds a $(DIST_PATH) prefix to the path to bionic.h so that it can be located from other applications' Makefiles that use BLIS's makefile fragments. commit 634e532c8dcce7383d96ba33276df65c656b2198 Author: Field G. Van Zee Date: Wed Aug 9 21:54:49 2023 -0500 Set thrcomm timpl_t id inside init functions. (#766) Details: - Previously, the timpl_t id being used when a thrcomm_t is being initialized was set within the bli_thrcomm_init() dispatch function after the timpl_t-specific bli_thrcomm_init_*() function returned. But it just occurred to me that each bli_thrcomm_init_*() function already intrinsically knows its own timpl_t value. This commit shifts the setting of the thrcomm_t.ti field into the corresponding bli_thrcomm_init_*() function for each timpl_t type (e.g. single, openmp, pthreads, hpx). - Removed long-deprecated code dating back nearly 10 years. - Whitespace changes - Comment updates. commit 3cf17b4a91232709bc6a205b0e4d7ecc96579aa9 Author: Field G. Van Zee Date: Mon Aug 7 13:46:20 2023 -0500 Small fixes/improvements to docs/Multithreading.md. (#764) Details: - Added reminders that #include "blis.h" must be added to source files in order to access BLIS API function prototypes. Thanks to Barry Smith for suggesting this improvement. - Fixed pre-existing typos. - CREDITS file update. commit dbc79812c390f812c7bf030bfcf87e947a1443c4 Author: Field G. Van Zee Date: Fri Jul 28 18:16:38 2023 -0500 CREDITS file update. Details: - Thanks to Igor Zhuravlov for PR #753 (commit 915daaa). commit 915daaa43cd189c86d93d72cd249714f126e9425 Author: Igor Zhuravlov Date: Thu Jul 27 20:33:59 2023 +0000 Fix typos in docs + example code comments. (#753) Details: - Fixed various typos in API documentation in docs/BLIS*API.md and comments in the source code examples within examples/?api/*.c. commit 2db31e057e7e9c97fc60021b5ae72a01a48d7588 Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Thu Jul 27 15:27:21 2023 -0500 Exclude -lrt on Android with Bionic libraries. (#755) Details: - Added build/detect/android/bionic.h header to test whether the __BIONIC__ cpp macro is defined. - In common.mk, only add -lrt to LDFLAGS when Bionic is not present. - CREDITS file update. commit 22ad8c1b752364784f320168b31995945ad84a59 Author: ct-clmsn Date: Thu Jul 27 16:23:29 2023 -0400 Small fixes to support hpx in the testsuite (#759) Details: - Minor changes to test_libblis.c to support hpx. commit c91b41d022e33da82b3b06c82be047a29873d9b6 Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Wed Jul 26 14:37:08 2023 -0500 Auto-detect the RISC-V ABI of the compiler and use -mabi= during RISC-V Builds (#750) Details: - Generate a build error if there is a 32/64-bit mismatch between the RISC-V ABI or architecture and the BLIS configuration selected. - Handle Q, Zicsr, ZiFencei, Zba, Zbb, Zbc, Zbs and Zfh extensions in the RISC-V architecture auto-detection. ZiFencei and Zicsr is not detectable with built-in RISC-V macros right now. - ZiFencei is not important for BLIS because doesn't it have Just-In-Time compilation or self-modifying code, and Zicsr is implied by the floating-point extensions, which are required for good performance in BLIS. - Move RISC-V autodetect header files to build/detect/riscv/. commit a0b04e3c007f1207e5678bf20c07752906742fb7 Author: Field G. Van Zee Date: Mon Jun 26 17:59:21 2023 -0500 Rewrote regen-symbols.sh (gen-libblis-symbols.sh). (#751) Details: - Wrote an alternative to regen-symbols.sh, gen-libblis-symbols.sh, that generates a list of exported symbols from the monolithic blis.h file rather than peeking inside of the shared object via nm. (This new script lives in the 'build' directory and the older script has been retired to build/old.) Special thanks to Devin Matthews for authoring gen-libblis-symbols.sh. - Added a 'symbols' target to the top-level Makefile which will refresh build/libblis-symbols.def, with supporting changes to common.mk. - Updates to build/libblis-symbols.def using the new symbol-generating script. commit 6b894c30b9bb2c2518848d74e4c8d96844f77f24 Author: Field G. Van Zee Date: Mon Jun 12 17:22:44 2023 -0500 Rewrote/fixed broken tree barrier implementation. Details: - Rewrote the defintion of bli_thrcomm_tree_barrier() so that it (a) actually worked again, and (b) used atomics instead of a basic C99 spin loop. (Note that the conventional barrier implementation is still enabled by default; the tree barrier must be toggled on manually within the configuration.) - Added an early return to the definition of bli_thrcomm_barrier() in the cases where comm == NULL or comm->n_threads == 1. - Reordered thread-related and thread-dependent header #include directives in blis.h so that the BLIS_TREE_BARRIER and BLIS_TREE_BARRIER_ARITY macros, which would be defined in the target configuration's in the bli_family_*.h file, would be #included prior to the inclusion of the thrcomm_t header that uses them. - Changed the type of barrier_t.count from 'int' to 'dim_t'. - Changed the type of barrier_t.signal from 'volatile int' to 'gint_t'. - Special thanks to Leick Robinson for contributing these changes. - Whitespace changes. commit d639554894b6252a86bd3164921bce6fbb9e3b5e Author: Field G. Van Zee Date: Wed Jun 7 16:11:14 2023 -0500 Pad thrcomm_t fields to avoid false sharing. Details: - Inserted a cache line of padding between various fields of the thrcomm_t and, in the case of the (presently defunct) tree barrier, fields of the barrier_t. This additional padding ensures that these fields, which both serve different purposes when performing a thread barrier, are only accessed when needed (and not just due to their spatial locality with their cache line neighbors). - Added a new cpp macro constant, BLIS_CACHE_LINE_SIZE, to bli_config_macro_defs. This new constant defines the size of a cache line (in bytes) and defaults to 64. - Special thanks to Leick Robinson for discovering this false sharing issue and developing/submitting the patch. commit 89b7863fc9a88903917deedc6a5ad9fd17f83713 Author: Devin Matthews Date: Mon May 8 16:51:18 2023 -0500 Fix 1m enablement for herk/her2k/syrk/syr2k. (#743) Details: - Ever since 28b0982, herk, her2k, syrk, and syr2k have been implemented in terms of the gemmt expert API. And since the decision of which induced method to use (1m or native) is made *below* the level of the expert API, executing any of {herk,her2k,syrk,syr2k} results in BLIS checking the enablement status for gemmt. - This commit applies a band-aid of sorts to this issue by modifying bli_l3_ind_oper_get_enable() and bli_l3_ind_oper_set_enable() so that any attempts to query or modify the internal enablement status for herk, her2k, syrk, or syr2k instead does so for gemmt. - This solution isn't perfect since, in theory, the user could enable 1m for, say, herk but then disable it for syrk, and then be confused when herk runs via native execution. But we don't anticipate that users modify 1m enablement at the operation level, and so in practice this solution is likely fine for now. commit 138de3b3e88c5bf7d8718c45c88811771cf42db8 Author: Ajay Panyala Date: Sun May 7 13:01:38 2023 -0700 add nvhpc compiler support (#719) Add detection of the NVIDIA nvhpc compiler (`nvc`) in `configure`, and adjust some warning options in `config.mk`. Currently, no specific options for `nvc` have been added in the relevant configurations so it may not be usable without further tweaks. commit 0873c0f6ed03fea321d1631b3d1a385a306aa797 Author: Devin Matthews Date: Sun May 7 14:03:19 2023 -0500 Consolidate INSERT_ macro sets via variadic macros. (#744) Details: - Consolidated INSERT_GENTFUNC_* (and corresponding GENTPROT) macro sets using variadic macros (__VA_ARGS__), which means we no longer need a different INSERT_ macro for each possible number of arguments the macro might take. This change seems reasonable given that variadic macros are a standard C99 feature and widely supported. I took care not to use variadic macros where 0 variadic arguments are expected since that is a non-standard extension. - Added pre-typecast parentheses to arithmetic expressions in printf() statements in bli_thread_range_tlb.c. commit ef9d3e6675320a53e7cb477c16b01388e708b1da Author: h-vetinari Date: Sun May 7 04:59:35 2023 +1100 Added missing #include for Windows. (#747) Details: - This commit fixes issue #746, in which the _access() function (called from within blastest/f2c/open.c) is undeclared when compiling on Windows with clang 16. commit 6fd9aabb03d172a792a7eeb106c7d965cf038421 Author: Devin Matthews Date: Fri May 5 14:22:52 2023 -0500 Fix bug in detecting Fortran compiler vendor (#745) `FC` was used instead of `found_fc`. commit 8215b02f99aa77ecc7d813508c247565115319d7 Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Wed Apr 12 12:59:27 2023 -0500 Apply #738 to make_defs.mk of RISC-V subconfigs. (#740) Details: - PR #738 -- which moved -fPIC flag insertion responsibilities from common.mk to the subconfigs' individual make_defs.mk files -- was merged shortly before the introduction of new RISC-V subconfigs in #693. This commit brings those RISC-V subconfigs up to date with the new -fPIC conventions. commit 6b38c5ac07a2a27738674784e58aa699bf895447 Author: angsch <17718454+angsch@users.noreply.github.com> Date: Tue Apr 11 19:27:43 2023 +0200 Add RISC-V target (#693) Details: - There are four RISC-V base configurations: 'rv32i', 'rv32iv', 'rv64i', and 'rv64iv', namely the 32-bit and 64-bit implementations with and without the 'V' vector extension. Additional extensions such as 'M' (multiplication), 'A' (atomics), 'F' ('float' hardware support), 'D' ('double' hardware support), and 'C' (compressed-length instructions), are automatically used when available. If they are not available, then software equivalents (e.g., softfloat and -latomic) are used. - './configure auto' can be invoked on a RISC-V build platform, and will automatically detect RISC-V CPU extensions through the RISC-V C API: https://github.com/riscv-non-isa/riscv-c-api-doc/blob/master/riscv-c-api.md - The assembly kernels assume the presence of the vector extension RVV 1.0. - It is possible to build 'rv[32,64]iv' for any value of VLEN. However, if VLEN < 128, the targets will fall back to the generic kernels and blocksizes. - The vector microkernels are vector-length agnostic and work with every VLEN >=128, but are expected to work best with smaller vector lengths, i.e., VLEN <= 512. - The assembly kernels cover column major storage (rs_c == 1). - The blocksizes aim at being a good generic choice for out-of-order cores. They are not tuned to a specific RISC-V HPC core. - The vector kernels have been tested using vlen={128,256,512}. - The single- and double-precision assembly code routines for 'sgemm' and 'dgemm', or for 'cgemm' and 'zgemm', are combined in their RISC-V vector assembly source code, and are differentiated only with macros. - The XLEN=32 and XLEN=64 versions of the RISC-V assembly code are identical, except that callee-saved registers are saved and restored differently. There are RISC-V assembly code #include files for handling the saving and restoring of callee-saved registers, and they are future-proof if ever XLEN=128. - Multiplications, such as computing array strides and offsets, are performed in C, and later passed to the RISC-V assembly kernels. This is so that the compiler can determine whether the 'M' (multiply) extension is available and use multiplication instructions, or call library helper functions instead. - A new macro called bli_static_assert() has been added to perform static assertions at compile-time, regardless of the C/C++ dialect of the compiler. The original motivation of this was to ensure that calling RISC-V assembly kernels would not silently truncate arguments of type 'dim_t' or 'inc_t' (so-called "narrowing conversions"). - RISC-V CI tests have been added to Travis CI, using the riscv-gnu-toolchain cross-compiler, and qemu simulator. - Thanks to Lee Killough for collaborating on this commit. commit 593d01761910af6a9a16ee0ac097142732f73c29 Author: Field G. Van Zee Date: Sat Apr 8 16:44:16 2023 -0500 CREDITS file update. commit 259f68479671bbaf9c5986759aaa0004f9b05a24 Author: Field G. Van Zee Date: Fri Apr 7 16:11:34 2023 -0500 CREDITS file update. Details: - Added attributions associated with commits: - 98d4678 9b1beec: @bartoldeman - 2b05948 059f151: @ct-clmsn - Reordered attirubtion for @decandia50. commit aea8e1d9243631635ca788d5e14f0f29328e637d Author: Field G. Van Zee Date: Mon Apr 3 12:17:51 2023 -0500 Optionally disable thread-local storage. (#735) Details: - Implemented a new configure option, --disable-tls, which allows the user to optionally disable the use of thread-local storage qualifiers on static variables in BLIS. This option will rarely be needed, but in some situations may allow BLIS to compile when TLS is unavailable. Thanks to Nick Knight for suggesting this option. - Unlike the --disable-system option, --disable-tls does not forcibly disable threading. Instead, warnings of the possible consequences of using threading with TLS disabled are added to: - the output of './configure --help'; - the output of 'configure' the --disable-tls option is parsed; - the informational header output by the testsuite. Thanks to Minh Quan Ho for suggesting these warnings. - Modified frame/include/bli_lang_defs.h so that BLIS_THREAD_LOCAL is defined to nothing when BLIS_ENABLE_TLS is not defined. - Defined bli_info_get_enable_tls(), which returns whether the cpp macro BLIS_ENABLE_TLS was defined. - Edited --disable-system configure status output for clarity. - Whitespace updates. commit 3f1432abe75cc306ef90a04381d7e0d8739fded8 Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Mon Apr 3 12:10:59 2023 -0500 Add output.testsuite to .gitignore (#736) Details: - Added `output.testsuite` to .gitignore since it was previously not being matched by `output.testsuite.*`. commit 38fc5237520a2f20914a9de8bb14d5999009b3fb Author: Field G. Van Zee Date: Thu Mar 30 17:30:07 2023 -0500 Added mm_algorithm pdf files (bp and pb). Details: - Added PDF versions of the PowerPoint files added in 17cd260. commit 17cd260cb504b2f3997c32daec77f4c828fbb32b Author: Field G. Van Zee Date: Wed Mar 29 21:47:12 2023 -0500 Added mm_algorithm pptx files (bp and pb). Details: - Added two PowerPoint files that contain slides depicting the classic Goto algorithm for matrix multiplication as well as its sister "panel-block" algorithm. These files reside in docs/diagrams. commit 9d778e0f7c94d8752dd578101e4fc6893a1f54ef Author: Field G. Van Zee Date: Wed Mar 29 17:36:49 2023 -0500 Move -fPIC insertion to subconfigs' make_defs.mk. (#738) * Move -fPIC insertion to subconfigs' make_defs.mk. Details: - Previously, common.mk was appending -fPIC to the CPICFLAGS variables set within the various subconfigurations' make_defs.mk files. This seemed somewhat unintuitive, and so now the -fPIC flag is assigned to the various subconfigs' CPICFLAGS variables in the respective make_defs.mk files. - This also commit changes the logic in common.mk so that instead of appending, the variable is overwritten, but now *only* in the case of Windows (since apparently -fPIC needs to be omitted there). Thanks to Nick Knight for catching and reporting this weirdness. commit 04090df01175477394d1e73af2e5769751d47cd6 Author: Field G. Van Zee Date: Mon Mar 27 14:13:10 2023 -0500 Fixed compile errors with `BLIS_DISABLE_BLAS_DEFS`. (#730) * Fixed compile errors with BLIS_DISABLE_BLAS_DEFS. Details: - This commit fixes a compile-time error related to the type definition (prototype) of dsdot_() when BLIS_DISABLE_BLAS_DEFS is defined by the application (or the configuration), which is actually a symptom of a larger design issue when disabling BLAS prototypes. The macro was intended to allow applications to bring their own BLAS prototypes and suppress the inclusion of duplicate (or possibly conflicting) prototypes within blis.h. However, prototypes are still needed during compilation even if they are ultimately omitted from blis.h. The problem is that almost every source file in BLIS--including the BLAS compatibility layer--only includes one header (blis.h), and if we were to #include a new header in the BLAS source files (to isolate only the BLAS prototypes), we would also have to make the build system aware of the location of those headers. Thanks to Edward Smyth of AMD for reporting this issue. - The solution I settled upon was to remove all cpp guards from all BLAS headers (by changing them to #if 1, for easy search-and-replace anchoring in the future if we ever need to re-insert guards) and modifying bli_blas.h so that the BLAS prototypes are #included if either (a) BLIS_ENABLE_BLAS_DEFS is defined, or (b) BLIS_ENABLE_BLAS_DEFS is *not* defined but BLIS_IS_BUILDING_LIBRARY *is* defined. (Thanks to Devin Matthews for steering me away from an inferior solution.) - This commit also spins off the actual BLAS prototypes/definitions to a separate file, bli_blas_defs.h. - CREDITS file update. commit 5f841307f668f65b7ed5a479bd8374d2581208cf Author: Field G. Van Zee Date: Fri Mar 24 20:05:13 2023 -0500 Omit -fPIC if shared library build is disabled. (#732) Details: - Updated common.mk so that when --disable-shared option is given to configure: 1. The -fPIC compiler flag is omitted from the individual configuration family members' CPICFLAGS variables (which are initialized in each subconfig's make_defs.mk file); and 2. The BUILD_SYMFLAGS variable, which contains compiler flags needed to control the symbol export behavior, is left blank. - The net result of these changes is that flags specific to shared library builds are only used when a shared library is actually scheduled to be built. Thanks to Nick Knight for reporting this issue. - CREDITS file update. commit 72c37eb80f964b7840377076e5009aec5b29d320 Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Thu Mar 23 16:01:55 2023 -0500 Updated configure to pass all shellcheck checks. (#729) Details: - Modified configure so that it passes all 'shellcheck' checks, disabling ones which we violate but which are just stylistic, or are special cases in our code. - Miscellaneous other minor changes, such as rearranged redirections in long sed/perl pipes to look more natural. - Whitespace tweaks. commit 60f36347c16e6336215cd52b4e5f3c0f96e7c253 Author: Field G. Van Zee Date: Wed Feb 22 20:37:30 2023 -0600 Fixed bugs in scal2v ref kernel when alpha == 1. (#728) Details: - Fixed a typo bug in ref_kernels/1/bli_scal2v_ref.c where the conditional that was supposed to be checking for cases when alpha is equal to 1.0 (so that copyv could be used instead of scal2v) was instead erroneously comparing alpha against 0.0. - Fixed another bug in the same function whereby BLIS_NO_CONJUGATE was erroneously being passed into copyv instead of the kernel's conjx parameter. This second bug was inert, however, due to the first bug since the "alpha == 0.0" case was already being handled, resulting in the code block never executing. commit fab18dca46618799bb0b4f652820b33d36a5d4d4 Author: Field G. Van Zee Date: Wed Feb 22 16:50:00 2023 -0600 Use 'void*' datatypes in kernel APIs. (#727) Details: - Migrated all kernel APIs to use void* pointers instead of float*, double*, scomplex*, and dcomplex* pointers. This allows us to define many fewer kernel function pointer types, which also makes it much easier to know which function pointer type to use at any given time. (For example, whereas before there was ?axpyv_ker_ft, ?axpyv_ker_vft, and axpyv_ker_vft, now there is just axpyv_ker_ft, which is equivalent so what axpyv_ker_vft used to be.) - Refactored how kernel function prototypes and kernel function types are defined so as to reduce redundant code. Specifically, the function signatures (excluding cntx_t* and, in the case of level-3 microkernels, auxinfo_t*) are defined in new headers named, for example, bli_l1v_ker_params.h. Those signatures are reused via macro instantiation when defining both kernel prototypes and kernel function types. This will hopefully make it a little easier to update, add, and manage kernel APIs going forward. - Updated all reference kernels according to the aforementioned switch to void* pointers. - Updated all optimzied kernels according to the aforementioned switch to void* pointers. This sometimes required renaming variables, inserting typecasting so that pointer arithmetic could continue to function as intended, and related tweaks. - Updated sandbox/gemmlike according to the aforementioned switch to void* pointers. - Renamed: - frame/1/bli_l1v_ft_ker.h -> frame/1/bli_l1v_ker_ft.h - frame/1f/bli_l1f_ft_ker.h -> frame/1f/bli_l1f_ker_ft.h - frame/1m/bli_l1m_ft_ker.h -> frame/1m/bli_l1m_ker_ft.h - frame/3/bli_l1m_ft_ukr.h -> frame/3/bli_l1m_ukr_ft.h - frame/3/bli_l3_sup_ft_ker.h -> frame/3/bli_l3_sup_ker_ft.h to better align with naming of neighboring files. - Added the missing "void* params" argument to bli_?packm_struc_cxk() in frame/1m/packm/bli_packm_struc_cxk.c. This argument is being passed into the function from bli_packm_blk_var1(), but wasn't being "caught" by the function definition itself. The function prototype for bli_?packm_struc_cxk() also needed updating. - Reordered the last two parameters in bli_?packm_struc_cxk(). (Previously, the "void* params" was passed in after the "const cntx_t* cntx", although because of the above bug the params argument wasn't actually present in the function definition.) commit 93c63d1f469c4650df082d0fa2f29c46db0e25f5 Author: Field G. Van Zee Date: Mon Feb 20 11:14:23 2023 -0600 Use 'const' pointers in kernel APIs. (#722) Details: - Qualified all input-only data pointers in the various kernel APIs with the 'const' keyword while also removing 'restrict' from those kernel APIs. (Use of 'restrict' was maintained in kernel implementations, where appropriate.) This affected the function pointer types defined for all of the kernels, their prototypes, and the reference and optimized kernel definitions' signatures. - Templatized the definitions of copys_mxn and xpbys_mxn static inline functions. - Minor whitespace and style changes (e.g. combining local variable declaration and initialization into a single statement). - Removed some unused kernel code left in 'old' directories. - Thanks to Nisanth M P for helping to validate changes to the power10 microkernels. commit 4e18cd34f909c5045597f411340ede3a5e0bc5e1 Author: RuQing Xu Date: Sun Feb 19 04:18:41 2023 +0900 Restored ArmSVE general storage case. (#708) Details: - Restored general storage case in armsve kernels. - Reason for doing this: Though real `g`-storage is difficult to speedup, `g`-codepath here can provide a good support for transposed-storage. i.e. at least good for `GEMM_UKR_SETUP_CT_AMBI`. - By experience, this solution is only *a little* slower than in-reg transpose. Plus in-reg transpose is only possible for a fixed VL in our case. commit 0ba6e9eafb1e667373d9dbc2aa045557921f33e2 Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Sat Feb 18 13:15:42 2023 -0600 Refined emacs handling of indentation. (#717) Details: - This refines the emacs autoformatting to be better in line with contribution guidelines. - Removed a stray shebang in a .mk file which confuses emacs about the file mode, which should be makefile-mode. (emacs also removes stray whitespace at the ends of lines.) commit 059f15105b1643fe56084f883c22b3cadf368b39 Author: ct-clmsn Date: Sat Feb 18 14:13:23 2023 -0500 Updated hpx namespace for make_count_shape. (#725) Details: - The hpx namespace for *counting_shape changed. This PR updates the use of counting_shape in blis to comply with the change in hpx. - Co-authored-by: ctaylor commit 0b421eff130b5c896edcc09e7358d18564d177e9 Author: Field G. Van Zee Date: Sat Feb 18 13:11:41 2023 -0600 Added an 'arm64' entry to `.travis.yml`. (#726) Details: - Added a new 'arm64' entry to the .travis.yml file in an attempt to get Travis CI to compile both NEON and SVE kernels, even if only NEON kernels are exercised in the testing. With this new 'arm64' entry, the 'cortexa57' entry becomes redundant and may be removed. Thanks to RuQing Xu for this suggestion. - Previously, the macro BLIS_SIMD_MAX_SIZE was *not* being set in bli_kernels_arm64.h, which meant that the default value of 64 was being used. This caused a runtime consistency check to fail in bli_gks.c (in Travis CI), one which requires that mr * nr * dt_size > BLIS_STACK_BUF_MAX_SIZE for all datatype sizes dt_size, where BLIS_STACK_BUF_MAX_SIZE is defined as BLIS_SIMD_MAX_NUM_REGISTERS * BLIS_SIMD_MAX_SIZE * 2 This commit increases BLIS_SIMD_MAX_SIZE to 128 for the 'arm64' configuration, thus overriding the default and (hopefully) avoiding the aforementioned consistency check failures. - Appended '|| cat ./output.testsuite' to all 'make' commands in travis/do_testsuite.sh. Thanks to RuQing Xu for this suggestion. - Whitespace changes. commit b1d3fc7e5b0927086e336a23f16ea59aa3611ccb Author: Field G. Van Zee Date: Fri Feb 10 15:34:47 2023 -0600 Redirect grep stderr to /dev/null. (#723) Details: - In common.mk, added a redirection of stderr to /dev/null for the grep command being used to gather a list of header files #included from bli_cntx_ref.c. The redirection is desirable because as of grep 3.8, regular expressions with "stray" backslashes trigger warnings [1]. But removing the backslash seems to break the BLIS build system when using pre-3.8 versions of grep, so this seems to be easiest way to satisfy the BLIS build system for both pre- and post-3.8 grep environments. [1] https://lists.gnu.org/archive/html/info-gnu/2022-09/msg00001.html commit e3d352f1fcc93e6a46fde1aa4a7f0a18fb27bd42 Author: Nisanth M P Date: Wed Feb 8 06:11:41 2023 +0530 Added runtime selection of 'power' config family. (#718) Details: - Created a 'power' umbrella configuration family, which, when targeted at configure-time, will build both 'power9' and 'power10' subconfigs. (With this feature, a BLIS shared library could be compiled on a power9 system and run on power10 and vice-versa. Unoptimised code will execute if it is linked and run on any other generic system.) - This new configuration family will only work with gcc, since that is the only compiler supported by both power9 and power10 subconfigs in BLIS. - Documented power9 and power10 as supported microarchitectures in the docs/HardwareSupport.md document. commit e730c685d09336b3bd09e86c94330c4eba967f3e Author: Field G. Van Zee Date: Mon Feb 6 15:31:54 2023 -0600 Define `BLIS_VERSION_STRING` in `blis.h`. (#720) Details: - Previously, the version string was communicated from configure to config.mk (via the config.mk.in template), where it was included via the top-level Makefile, where it was then used to define the preprocessor macro BLIS_VERSION_STRING via a command line argument to the compiler (via -D). This macro is then used within bli_info.c to initialize a static string which can then be queried via the bli_info_get_version_str() function. However, there are some applications that may find utility in being able to access the version string by inspecting the monolithic (flattened) blis.h header file that is created at compile time and installed alongside the library. This commit moves the definition of BLIS_VERSION_STRING into bli_config.h (via the bli_config.h.in template) so that it is embedded in blis.h. The version string is now available in three places: - the static/shared library, which is installed in the 'lib' subdirectory of the install prefix (query-able via the bli_info_get_version_str() function); - the config.mk makefile fragment, which is installed in the 'share' subdirectory of the install prefix (in the VERSION variable); - the blis.h header file, which is installed in the 'include' subdirectory of the install prefix (via the BLIS_VERSION_STRING macro constant). Thanks to Mohsen Aznaveh and Tim Davis for providing the idea for this change. - CREDITS file update. commit dc5d00a6ce0350cd82859d8c24f23d98f205d8db Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Fri Jan 27 17:36:47 2023 -0600 Typecast printf() args to avoid compiler warnings. (#716) Details: - In bli_thread_range_tlb.c, typecast integer arguments passed to printf() -- which are typically disabled unless debugging -- to type "long" to guarantee a match to the "%ld" format specifiers used in those calls. This avoids spurious warnings with certain compilers in certain toolchain environments, such as 32-bit RISC-V (rv32iv). commit ecbcf4008815035c695822fcaf106477debff89a Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Wed Jan 18 20:35:50 2023 -0600 Use here-document for 'configure --help' output. (#714) Details: - Changed the configure script function that outputs "--help" text to do so via so-called "here-document" syntax for improved readability and maintainability. The change eliminates hundreds of echo statements and makes it easier to change existing configure options' help text, along with other benefits such as eliminating the need to escape double- quote characters ("). commit c334ec278f5e2a101625629b2e13bbf1b38dede5 Author: Devin Matthews Date: Wed Jan 18 13:10:19 2023 -0600 Merge tlb- and slab/rr-specific gemm macrokernels. (#711) Details: - Merged the tlb-specific gemm macrokernel (_var2b) with the slab/rr- specific one (var2) so that a single function can be compiled with either tlb or slab/rr support, depending on the value of the BLIS_ENABLE_JRIR_TLB, _SLAB, and _RR. This is done by incorporating information from both approaches: the start/end/inc for the JR and IR loops from slab or rr partitioning; and the number of assigned microtiles, plus the starting IR dimension offset for all iterations after the first (ir_next). With these changes, slab, rr, and tlb can all be parameterized by initializing a similar set of variables prior to the jr loop. - Removed the wrap-around logic that sets the "b_next" field of the auxinfo_t struct, which executes during the last IR iteration of the last JR iteration. The potential benefit of this code is so minor (and hinges on the microkernel making use of the b_next field) that it's arguably not worth including. The code also does the wrong thing for some threads whenever JR_NT > 1, since only thread 0 (in the JR group) would even compute with the first micropanel of B. - Re-expressed the definition of bli_is_last_iter_slrr so that slab and tlb use the same code rather than rr and tlb. - Adjusted the initialization of the gemm control tree accordingly. commit 5793a77937aee9847a5692c8e44b36a6380800a1 Author: HarshDave12 <122850830+HarshDave12@users.noreply.github.com> Date: Tue Jan 17 21:55:02 2023 +0530 Fixed mis-mapped instruction for VEXTRACTF64X2. (#713) Details: - This commit fixes a typo in the macro definition for the extended inline assembly macro VEXTRACTF64X2 in bli_x86_asm_macros.h. The macro was previously defined (incorrectly) in terms of the vextractf64x4 instruction rather than vextractf64x2. - CREDITS file update. commit 16d2e9ea9ca0853197b416eba701b840a8587bca Author: Field G. Van Zee Date: Fri Jan 13 20:03:01 2023 -0600 Defined lt, lte, gt, gte + misc. other updates. (#712) Details: - Changed invertsc operation to be a non-destructive operation; that is, it now takes separate input and output operands. This change applies to both the object and typed APIs. - Defined an alternative square root operation, sqrtrsc, which, when operating on complex scalars, assumes the imaginary part of the input to be zero. - Changed the semantics of addm, subm, copym, axpym, scal2m, and xpbym so that when the source matrix has an implicit unit diagonal, the operation leaves the diagonal of the destination matrix untouched. Previously, the operations would interpret an implicit unit diagonal on the source matrix as a request to manifest the unit diagonal *explicitly* on output (either as something to copy in the case of copym, or something to compute with in the cases of addm, subm, axpym, scal2m, and xpbym). It turns out that this behavior was too cute by half and could cause unintended headaches for practical use cases. (This change in behavior also required small modifications to the trmv and trsv testsuite modules so that they would properly test matrices with unit diagonals.) - Added missing dependencies for copym to gemv, ger, hemv, trmv, and trsv testsuite modules. - Implemented level-0-like ltsc, ltesc, gtsc, gtesc operations in frame/util, which use lt, lte, gt, and gte level-0 scalar macros. - Trivial variable rename in bli_part.c to harmonize with other variable naming conventions. commit 9a366b14fe52c469f4664ef5dd93d85be8d97baa Author: Field G. Van Zee Date: Thu Jan 12 13:07:22 2023 -0600 Implement cntx_t pointer caching in gks. (#709) Details: - Refactored the gks cntx_t query functions so that: (1) there is a clearer pattern of similarity between functions that query a native context and those that query its induced (1m) counterpart; and (2) queried cntx_t pointers (for both native and induced cntx_t pointers) are cached (by default), or deep-queried upon each invocation, depending on whether cpp macro BLIS_ENABLE_GKS_CACHING is defined. - Refactored query-related functions in bli_arch.c to cache the queried arch_t value (by default), or deep-query the arch_t value upon each invocation, depending on whether cpp macro BLIS_ENABLE_GKS_CACHING is defined. - Tweaked the behavior of bli_gks_query_ind_cntx_impl() (formerly named bli_gks_query_ind_cntx()) so that the induced method cntx_t struct is repopulated each time the function is called. (It is still only allocated once on first call.) This was mostly done in preparation for some future in which the arch_t value might change at runtime. In such a scenario, the induced method context would need to be recalculated any time the native context changes. - Added preprocessor logic to bli_config_macro_defs.h to handle enabling or disabling of cntx_t pointer caching (via BLIS_ENABLE_GKS_CACHING). - For now, cntx_t pointer caching is enabled by default and does not correspond to any official configure option. Disabling can be done by inserting a #define for BLIS_DISABLE_GKS_CACHING into the appropriate bli_family_*.h header file within the configuration of interest. - Thanks to Harihara Sudhan S (AMD) for suggesting that cntxt_t pointers (and not just arch_t values) be cached. - Comment updates. commit b895ec9f1f66fb93972589c06bff171337153a31 Author: Nisanth M P Date: Wed Jan 11 09:02:32 2023 +0530 Fixing type-mismatch errors in power10 sandbox (#701) Details: - This commit fixes a mismatch between the function type signature of bli_gemm_ex() required by BLIS and the version of the function defined within the power10 sandbox. It also performs typecasting upon calling bli_gemm_front() to attain type consistency with the type signature defined by BLIS for bli_gemm_front(). commit 38d88d5c131253066cad4f98eea06fa9299cae3b Author: Devin Matthews Date: Tue Jan 10 21:24:58 2023 -0600 Define new global scalar (obj_t) constants. (#703) Details: - This commit defines the following new global scalar constants: - BLIS_ONE_I: This constant encodes the imaginary unit. - BLIS_MINUS_ONE_I: This constant encodes the negative imaginary unit. - BLIS_NAN: This constant encodes a not-a-number value. Both real and imaginary parts are set to NaN for complex datatypes. commit cdb22b8ffa5b31a0c16ac1a7bcecefeb5216f669 Author: Nisanth M P Date: Wed Jan 11 08:50:57 2023 +0530 Disable power10 kernels other than sgemm, dgemm. (#705) Details: - There is a power10 sandbox which uses microkernels for datatypes other than float and double (or scomplex/dcomplex). In a regular power10- configured build (that is, with the sandbox disabled), there were compile errors for some of these other non-sgemm/non-dgemm microkernels. This commit protects those kernels with a new cpp macro guard (which is defined in sandbox/power10/bli_sandbox.h) that prevents that kernel code from being compiled for normal, non-sandbox power10 builds. commit d220f9c436c0dae409974724d42ab6c52f12a726 Author: Nisanth M P Date: Wed Jan 11 08:43:03 2023 +0530 Fix k = 0 edge case in power10 microkernels (#706) Details: - When power10 sgemm and dgemm microkernels are called with k = 0, they become caught in infinite loops and segfault. This is fixed now via an early exit in the case of k = 0. commit 2e1ba9d13c23a06a7b6f8bd326af428f7ea68c31 Author: Field G. Van Zee Date: Tue Jan 10 21:05:54 2023 -0600 Tile-level partitioning in jr/ir loops (ex-trsm). (#695) Details: - Reimplemented parallelization of the JR loop in gemmt (which is recycled for herk, her2k, syrk, and syr2k). Previously, the rectangular region of the current MC x NC panel of C would be parallelized separately from from the diagonal region of that same submatrix, with the rectangular portion being assigned to threads via slab or round-robin (rr) partitioning (as determined at configure- time) and the diagonal region being assigned via round-robin. This approach did not work well when extracting lots of parallelism from the JR loop and was often suboptimal even for smaller degrees of parallelism. This commit implements tile-level load balancing (tlb) in which the IR loop is effectively subjugated in service of more equitably dividing work in the JR loop. This approach is especially potent for certain situations where the diagonal region of the MC x NR panel of C are significant relative to the entire region. However, it also seems to benefit many problem sizes of other level-3 operations (excluding trsm, which has an inherent algorithmic dependency in the IR loop that prevents the application of tlb). For now, tlb is implemented as _var2b.c macrokernels for gemm (which forms the basis for gemm, hemm, and symm), gemmt (which forms the basis of herk, her2k, syrk, and syr2k), and trmm (which forms the basis of trmm and trmm3). Which function pointers (_var2() or _var2b()) are embedded in the control tree will depend on whether the BLIS_ENABLE_JRIR_TLB cpp macro is defined, which is controlled by the value passed to the existing --thread-part-jrir=METHOD (or -r METHOD) configure option. This script adds 'tlb' as a valid option alongside the previously supported values of 'slab' and 'rr'. ('slab' is still the default.) Thanks to Leick Robinson for abstractly inspiring this work, and to Minh Quan Ho for inquiring (in PR #562, and before that in Issue #437) about the possibility of improved load balance in macrokernel loops, and even prototyping what it might look like, long before I fully understood the problem. - In bli_thread_range_weighted_sub(), tweaked the the way we compute the area of the current MC x NC trapezoidal panel of C by better taking into account the microtile structure along the diagonal. Previously, it was an underestimate, as it assumed MR = NR = 1 (that is, it assumed that the microtile column of C that overlapped with microtiles exactly coincided with the diagonal). Now, we only assume MR = NR. This is still a slight underestimate when MR != NR, so the additional area is scaled by 1.5 in a hackish attempt to compensate for this, as well as other additional effects that are difficult to model (such as the increased cost of writing to temporary tiles before finally updating C). The net effect of this better estimation of the trapezoidal area should be (on average) slightly larger regions assigned to threads that have little or no overlap with the diagonal region (and correspondingly slightly smaller regions in the diagonal region), which we expect will lead to slightly better load balancing in most situations. - Spun off the contents of bli_thread.[ch] that relate to computing thread ranges into one of three source/header file pairs: - bli_thread_range.[ch], which define functions that are not specific to the jr/ir loops; - bli_thread_range_slab_rr.[ch], which define functions that implement slab or round-robin partitioning for the jr/ir loops; - bli_thread_range_tlb.[ch], which define functions that implement tlb for the jr/ir loops. - Fixed the computation of a_next in the last iteration of the IR loop in bli_gemmt_l_ker_var2(). Previously, it always "wrapped" back around to the first micropanel of the current MC x KC packed block of A. However, this is almost never actually the micropanel that is used next. A new macro, bli_gemmt_l_wrap_a_upanel(), computes a_next correctly, with a similarly named bli_gemmt_u_wrap_a_upanel() for use in the upper-stored case (which *does* actually always choose the first micropanel of A as its a_next at the end of the IR loop). - Removed adjustments for a_next/b_next (a2/b2) for the diagonal- intersecting case of gemmt_l_ker_var2() and the above-diagonal case of gemmt_u_ker_var2() since these cases will only coincide with the last iteration of the IR loop in very small problems. - Defined bli_is_last_iter_l() and bli_is_last_iter_u(), the latter of which explicitly considers whether the current microtile is the last tile that intersects the diagonal. (The former does the same, but the computation coincides with the original bli_is_last_iter().) These functions are now used in gemmt to test when a_next (or a2) should "wrap" (as discussed above). Also defined bli_is_last_iter_tlb_l() and bli_is_last_iter_tlb_u(), which are similar to the aforementioned functions but are used when employing tlb in gemmt. - Redefined macros in bli_packm_thrinfo.h, which test whether an iteration of work is assigned to a thread, as static inline functions in bli_param_macro_defs.h (and then deleted bli_packm_thrinfo.h). In the process of redefining these macros, I also renamed them from bli_packm_my_iter_rr/sl() to bli_is_my_iter_rr/sl(). - Renamed bli_thread_range_jrir_rr() -> bli_thread_range_rr() bli_thread_range_jrir_sl() -> bli_thread_range_sl() bli_thread_range_jrir() -> bli_thread_range_slrr() - Renamed bli_is_last_iter() -> bli_is_last_iter_slrr() - Defined bli_info_get_thread_jrir_tlb() and renamed: - bli_info_get_thread_part_jrir_slab() -> bli_info_get_thread_jrir_slab() - bli_info_get_thread_part_jrir_rr() -> bli_info_get_thread_jrir_rr() - Modified bli_rntm_set_ways_for_op() to redirect IR loop parallelism into the JR loop when tlb is enabled for non-trsm level-3 operations. - Added a sanity check to prevent bli_prune_unref_mparts() from being used on packed objects. This prohibition is necessary because the current implementation does not take into account the atomicity of packed micropanel widths relative to the diagonal of structured matrices. That is, the function prunes greedily without regard to whether doing so would prune off part of a micropanel *which has already been packed* and assigned to a thread for inclusion in the computation. - Further restricted early returns in bli_prune_unref_mparts() to situations where the primary matrix is not only of general structure but also dense (in terms of its uplo_t value). The addition of the matrix's dense-ness to the conditional is required because gemmt is somewhat unusual in that its C matrix has general structure but is marked as lower- or upper-stored via its uplo_t. By only checking for general structure, attempts to prune gemmt C matrices would incorrectly result in early returns, even though that operation effectively treats the matrix as symmetric (and stored in only one triangle). - Fixed a latent bug in bli_thread_range_rr() wherein incorrect ranges were computed when 1 < bf. Thankfully, this bug was not yet manifesting since all current invocations used bf == 1. - Fixed a latent bug in some unexercised code in bli_?gemmt_l_ker_var2() that would perform incorrect pruning of unreferenced regions above where the diagonal of a lower-stored matrix intersects the right edge. Thankfully, the bug was not harming anything since those unreferenced regions were being pruned prior to the macrokernel. - Rewrote slab/rr-based gemmt macrokernels so that they no longer carved C into rectangular and diagonal regions prior to parallelizing each separately. The new macrokernels use a unified loop structure where quadratic (slab) partitioning is used. - Updated all level-3 macrokernels to have a more uniform coding style, such as wrt combining variable declarations with initializations as well as the use of const. - Updated bls_l3_packm_var[123].c to use bli_thrinfo_n_way() and bli_thrinfo_work_id() instead of bli_thrinfo_num_threads() and bli_thrinfo_thread_id(), respectively. This change probably should have been included in aeb5f0c. - Removed old prototypes in bli_gemmt_var.h and bli_trmm_var.h that corresponded to functions that were removed in aeb5f0c. - Other very minor cleanups. - Comment updates. commit b6735ca26b9d459d9253795dc5841ae8de9e84c9 Author: Devin Matthews Date: Fri Jan 6 14:10:01 2023 -0600 Refactor structure awareness in packm_blk_var1.c. (#707) Details: - Factored some of the structure awareness out of the loop in bli_packm_blk_var1(). So instead of having a single loop with conditionals in the body to handle various kinds of structure (and stored/unstored submatrix placement), we now have a conditional branch to handle various structure/storage scenarios with a loop in each section. This change was originally motivated to choose slab or round- robin partitioning (in the context of triangular matrices) based on the structure of the entire block (or panel) being packed rather than each micropanel individually. Previously, the code would attempt to limit rr to the portion of the block that intersects the diagonal and use slab for the remainder. However, that approach was not well-thought out and in many situations this would lead to inferior load balancing when compared to using round-robin for the entire block (or panel). This commit has the added benefit of incurring less overhead during the packing process now that each of the new loops is simpler. commit f956b79922da412791e4c8b8b846b3aafc0a5ee0 Author: Field G. Van Zee Date: Sat Dec 31 20:18:08 2022 -0600 Switch to l3 sup decorator in gemmlike sandbox. (#704) Details: - Modified the gemmlike sandbox to call bli_l3_sup_thread_decorator() rather than a local analogue of that code. This reduces redundant logic and makes it easier for the sandbox to inherit future improvements to the framework's threading code. - Moved addon/gemmd to addon/old/gemmd. This code has fallen out of date and is taking too much effort to maintain. We will very likely reimplement it completely once future changes are made to the framework proper. commit 538150c5845ad903773ca797c740048174116aa4 Author: Field G. Van Zee Date: Sun Dec 25 22:28:09 2022 -0600 Applied race condition fix to sup thread decorator. Details: - Applied the race condition bugfix in commit 7d23dc2 to the corresponding sup code in bli_l3_sup_decor.c. Note that in the case of sup, the race condition would have only manifested when optional packing was enabled at runtime (typically via setting BLIS_PACK_A and/or BLIS_PACK_B environment variables). - Both the fix in this commit and the fix in 7d23dc2 address bugs that were introduced when the thrinfo_t trees/communicators were restructured in the October omnibus commit (aeb5f0c). commit 7d23dc2a064a371dc9883e2c2c7236a70912428c Author: Devin Matthews Date: Sun Dec 25 19:09:14 2022 -0600 Fix a race condition which manifested as incorrect results (rarely). (#702) The problem occurs when there are at least two teams of threads packing different parts of a matrix, and where each team has at least two threads; call them team A and team B. The problematic sequence is: 1. The chief of team A checks out a block B and broadcasts the pointer to its teammates. 2. Team A completely packs their data and perform a barrier amongst themselves. 3. Team A commences computing with the packed data. 4. The chief of team A finishes computing before its teammates, then calls bli_thrinfo_free on its thrinfo_t struct (which contains the mem_t object referencing the buffer B). This causes buffer B to be checked back in to the pba. 5. The chief of team B checks out the *same* block B that was just checked back in and broadcasts the pointer to its teammates. 6. DATA RACE: now the remaining threads of team A are reading *while* team B are writing to the same buffer B. If team A write new data before team B are done computing then an incorrect result is generated. The solution is to place a global barrier before the call to bli_thrinfo_free at the end of the computation. Co-authored-by: Field G. Van Zee commit 3accacf57d11e9b109339754f91bf22329b6cb6a Author: Field G. Van Zee Date: Fri Dec 16 10:26:33 2022 -0600 Skip 1m optimization when forcing hemm_l/symm_l. (#697) Details: - Fixed a bug in right-sided hemm when: - using the 1m method, - #defining BLIS_DISABLE_HEMM_RIGHT in the active subconfiguration, and - the storage of C matches the gemm microkernel IO preference PRIOR to the right-sidedness being detected and recast in terms of the left- side code path. It turns out that bli_gemm_ind_recast_1m_params() was applying its optimization (recasting a complex-domain macrokernel calling a 1m virtual microkernel to a real-domain macrokernel calling the real- domain microkernel) in situations in which it should not have. The optimization was silently assuming that the storage of C always matched that of the microkernel preference, since the front-end (in this case, bli_hemm_front()) would have already had a chance to transpose the operation to bring the two into agreement. However, by disabling right-sided hemm, we deprive BLIS of that flexibility (as a transposed left-sided case would necessarily have to become a right- sided case), and thus the assumption was no longer holding in all cases. Thanks to Nisanth M P for reporting this bug in Issue #621. - The aforementioned bug, and its bugfix, also apply to symm when BLIS_DISABLE_SYMM_RIGHT is defined. - Comment updates. - CREDITS file update. commit 4833ba224eba54df3f349bcb7e188bcc53442449 Author: Field G. Van Zee Date: Mon Dec 12 20:26:02 2022 -0600 Fixed perf of mt sup with packing, and mt gemmlike. (#696) Details: - Brought the gemmsup code path up to date relative to the latest thrinfo_t semantics introduced in the October Omnibus commit (aeb5f0c). This was done by passing the prenode (instead of the current node) into the packm variant within bli_l3_sup_packm.c as well as creating the prenodes and attaching them to the thrinfo_t tree in bli_l3_sup_thrinfo_create(). These changes erase the performance degradation introduced in the omnibus when running multithreaded sup with optional packing enabled. Special thanks to Devin Matthews for sussing out this fix in short order. - Fixed the gemmlike sandbox in a manner similar to that of sup with packing, described above. This also involved passing the prenode into the local gemmlike packm variant. (Recall that gemmlike recycles the use of bli_l3_sup_thrinfo_create(), so it automatically inherits that part of the sup fix described above.) - Updated bls_l3_packm_var[123].c to use bli_thrinfo_n_way() and bli_thrinfo_work_id() instead of bli_thrinfo_num_threads() and bli_thrinfo_thread_id(), respectively. commit db10dd8e11a12d85017f84455558a82c0093b1da Author: Field G. Van Zee Date: Tue Nov 29 19:10:31 2022 -0600 Fixed _gemm_small() prototype; disabled gemm_small. Details: - Fixed a mismatch between the prototype for bli_gemm_small() in bli_gemm_front.h and the actual definition of bli_gemm_small() in kernels/zen/3/bli_gemm_small.c. The former was erroneously declaring the cntl_t* argument as 'const'. Thanks to Jeff Diamond for reporting this issue. - Commented out BLIS_ENABLE_SMALL_MATRIX, BLIS_ENABLE_SMALL_MATRIX_TRSM macro definitions in config/zen3/bli_family_zen3.h. AMD's small matrix implementation should probably remain disabled in vanilla BLIS, at least for now. commit f0337b784d164ae505ca0e11277a1155680500d1 Author: Field G. Van Zee Date: Sun Nov 13 21:36:47 2022 -0600 Trival whitespace/comment tweaks. Details: - Trivial whitespace and comment changes, most of which ideally would have been part of the previous commit pertaining to HPX (2b05948). commit 2b05948ad2c9785bc53f376d53a7141cbc917447 Author: ct-clmsn Date: Sun Nov 13 17:40:22 2022 -0500 blis support for hpx (#682) Implement threading backend via HPX. HPX is an asynchronous many task runtime system used in high performance computing applications. The runtime implements the ISO C++ parallelism specification and provides a user-space thread implementation. This PR provides BLIS a thread backend implementation using HPX and resolves feature request #681. The configuration script, makefiles, and testsuite have been updated to support an HPX build option. The addition of HPX support provides other developers an exemplar for integrating other C++ threading backends into BLIS. Co-authored-by: ctaylor Co-authored-by: Devin Matthews commit e1ea25da43508925e33d4e57e420cfc0a9de793f Author: Field G. Van Zee Date: Fri Nov 11 12:07:51 2022 -0600 Fixed subtle barrier_fpa bug in bli_thrcomm.c. (#690) Details: - In bli_thrcommo.c, correctly initialize the BLIS_OPENMP element of the barrier function pointer array (barrier_fpa) to NULL when BLIS_ENABLE_OPENMP is *not* defined. Similarly, initialize the BLIS_POSIX element of barrier_fpa to NULL when BLIS_ENABLE_PTHREADS is not enabled. This bug was introduced in a1a5a9b and was likely the result of an incomplete edit. The effects of the bug would have likely manifested when querying a thrcomm_t that was initialized with a timpl_t value corresponding to a threading implementation that was omitted from the -t option at configure-time. commit dc6e5f3f5770074ba38554541b8b64711a68c084 Author: leekillough <15950023+leekillough@users.noreply.github.com> Date: Thu Nov 3 18:33:08 2022 -0500 Enhance emacs formatting of C files to remove trailing whitespace and ensure a newline at the end of file commit 713d078075a4a563a43d83fd0880ab5091c2e4a4 Author: Field G. Van Zee Date: Thu Nov 3 20:00:11 2022 -0500 Delete mpi_test garbage. (#689) Details: - tlrmchlsmth: "What even is this? No comments, no commit message, not used by anything. Trash." commit 8d813f7f12732d52c95570ae884d5defbfd19234 Author: Field G. Van Zee Date: Thu Nov 3 19:10:47 2022 -0500 Some decluttering of the top-level directory. Details: - Relocated 'mpi_test' directory to test/mpi_test. - Relocated 'so_version' and 'version' files from top-level directory to 'build' directory. - Updated build/bump-version.sh script to accommodate relocation of 'version' file to 'build' directory. - Updated configure script to accommodate relocation of 'so_version' file to 'build' directory. - Updated INSTALL file to replace pointers to blis-devel mailing list with a pointer to docs/Discord.md. - Updated RELEASING file to contain a reminder to consider whether the so_version file should be updated prior to the release. commit 6774bf08c92fc6983706a91bbb93b960e8eef285 Author: Lee Killough <15950023+leekillough@users.noreply.github.com> Date: Thu Nov 3 15:20:47 2022 -0500 Fix typo in configure --help text. (#686) Details: - Fixed a misspelling in the --help description for the --int-size (-i) configure option. commit 872898d817f35702e7678ff7f3eeff0f12e641f5 Author: Field G. Van Zee Date: Wed Nov 2 21:53:22 2022 -0500 Fixed trmm[3]/trsm performance bug in cf7d616. (#685) Details: - Fixed a performance bug in the packing of micropanels that intersect the diagonal of triangular matrices (i.e., those found in trmm, trmm3, and trsm). This bug was introduced in cf7d616 and stemmed from an ill-formed boolean conditional expression in bli_packm_blk_var1(). This conditional would chose when to use round-robin parallel work allocation, but checked for the triangularity of the submatrix being packed while failing also to check for whether the current micropanel actually intersected the diagonal. The net result of this bug was that *all* micropanels of a triangular matrix, no matter where the upanels resided within the matrix, were assigned to threads via a round-robin policy. This affected some microarchitectures and threading configurations much worse than others, but it seems that overall the effect was universally negative, likely because of the reduced spatial locality during the packing with round-robin. Thanks to Leick Robinson for his tireless efforts in helping track down this issue. commit edcc2f9940449f7d9cefcfc02159d27b013e7995 Author: Field G. Van Zee Date: Wed Nov 2 19:04:49 2022 -0500 Support --nosup, --sup configure options. (#684) Details: - Added --nosup and --sup as alternative ways of requesting that sup be disabled or enabled. These are analagous to --disable-sup-handling and --enable-sup-handling, respectively. (I got tired of typing out --disable-sup-handling and needed a shorthand notation.) - Tweaked message output by configure when sup is enable/disabled for clarity and specificity. - Whitespace changes. commit 5eea6ad9eb25f37685d1ae4ae08c73cd1daca297 Author: Field G. Van Zee Date: Wed Nov 2 17:07:54 2022 -0500 Add mention of Wilkinson Prize to README.md. (#683) Details: - Added blurbs and links to Wilkinson Prize to README.md. - Added mention of both Best Paper and Wilkinson Prizes to the top of README.md. - Other minor tweaks. commit 29f79f030e939969d4f3876c4fdaac7b0c5daa63 Author: Devin Matthews Date: Mon Oct 31 18:57:45 2022 -0500 Fixed performance bug caused by redundant packing. (#680) Details: - Fixed a performance bug whereby multiple threads were redundantly packing the same (rather than separate) micropanels. This bug was caused by different parts of the code using the num_threads/thread_id field of the thrinfo_t vs. the n_way/work_id fields. The fix was to standardize on the latter and provide a "fake" thrinfo_t sub-prenode in the thrinfo tree which consists of single-member thread teams. The single team with multiple threads node is still required since it and only it can be used to perform barriers and broadcasts (e.g. of the packed buffer pointer). commit aeb5f0cc19665456e990a7ffccdb09da2e3f504b Author: Devin Matthews Date: Thu Oct 27 12:39:11 2022 -0500 Omnibus PR - Oct 2023 (#678) Details: - This is an "omnibus" commit, consisting of multiple medium-sized commits that affect non-trivial aspects of BLIS. The major highlights: - Relocated the pba, sba pool (from the rntm_t), and mem_t (from the cntl_t) to the thrinfo_t object. This allows the rntm_t to be effectively const (although it is sometimes copied internally and modified to reflect different ways of parallelism). Moving the mem_t sets the stage for sharing a global control tree amongst all threads. - De-templatized the macrokernels for gemmt, trmm, and trsm to match the macrokernel for gemm, which has been de-templatized since 54fa28b. - Reimplemented bli_l3_determine_kc() by separating out the logic for adjusting KC based on MR/NR for triangular A and/or B into a new function, bli_l3_adjust_kc(). For now, this function is still called from bli_l3_determine_kc(), but in the future we plan to have it called once when constructing the control tree. - Refactored the level-3 thread decorator into two parts: - One part deals only with launching threads, each one calling a generic thread entry function. This code resides in frame/thread and constitutes the definition of bli_thread_launch(). Note that it is specific to the threading implementation (OpenMP, pthreads, single, etc.) - The other part deals with passing the matrix operands and related information into bli_thread_launch(). This is the "l3 decorator" and now resides in frame/3. It is agnostic to the threading implementation. - Modified the "level" of the thread control tree passed in at each operation. Previously, each operation (e.g. bli_gemm_blk_var1()) was passed in a communicator representing the active thread teams which would share the available work. Now, the *parent* thread comm is passed in. The operation then grabs the child comm and uses it to partition the work. The difference is in bli_trsm_blk_var1(), where there are now two children nodes for this single operation (i.e. the thread control tree is split one level above where the control tree is). The sub-prenode is used for the trsm subproblem while the normal sub-node is used for the gemm part. Importantly, the parent comm is used for the barrier between them. - Removed cntl_t* arguments from bli_*_front() functions. These will be added back in the future when the control tree's creation is moved so that it happens much sooner (provided that bli_*_front() have not been absorbed into their respective bli_*_ex() functions). - Renamed various bli_thread_*() query functions to bli_thrinfo_*(), for consistency. This includes _num_threads(), _thread_id(), _n_way(), _work_id(), _sba_pool(), _pba(), _mem(), _barrier(), _broadcast(), and _am_chief(). - Removed extraneous barrier from _blk_var3() of gemm and trsm. - Fixed a typo in bli_type_defs.h where BLIS_BLAS_INT_TYPE_SIZE was misspelled. commit c803b03e52a7a6997a8d304a8cfa9acf7c1c555b Author: Devin Matthews Date: Wed Oct 26 18:20:00 2022 -0500 Add check to disable armsve on Apple M1. commit 2dd692b710b6a9889f7ebdd7934a2108be5c5530 Author: Devin Matthews Date: Wed Oct 26 18:10:26 2022 -0500 Fix auto-detection of firestorm (Apple M1). commit 88105dbecf0f9dfbfa30215743346e8bd6afb971 Author: Field G. Van Zee Date: Fri Oct 21 15:16:12 2022 -0500 Added Discord documentation (#677) Details: - Added a docs/Discord.md markdown document that walks the reader through creating a Discord account, obtaining the invite link, and using the link to join the BLIS Discord server. - Updated README.md to reference the new Discord.md document in multiple places, including via the official Discord logo (used with explicit permission from representatives at Discord Inc.). commit 23f5b8df3e802a27bacd92571184ec57bbdfa646 Author: Field G. Van Zee Date: Mon Oct 17 20:21:21 2022 -0500 Shuffled checked properties in bli_l3_check.c. (#676) Details: - Added certain checks for matrix structure to the level-3 operations' _check() functions, and slightly reorganized existing checks. commit 9453e0f163503f64a290256b4be53d8882224863 Author: Field G. Van Zee Date: Mon Oct 3 19:46:20 2022 -0500 CREDITS file update. Details: - This attribution was intended to go in PR #647. commit 76a23bd8c33e161221891935a489df9a9fb9c8c0 Author: Devin Matthews Date: Mon Oct 3 15:55:07 2022 -0500 Reinstate sanity check in bli_pool_finalize. (#671) Details: - Added a reinit argument to bli_pool_finalize(). This bool will signal whether or not the function is being called from bli_pool_reinit(). If it is not being called from _reinit(), we can safely check to confirm that .top_index == 0 (i.e., all blocks have been checked in). But if it *is* being called from _reinit(), then that check will be skipped since one of the predicted use cases for bli_pool_reinit() anticipates that some blocks are (probably) checked out when the pool_t is reinitialized. - Updated existing invocations of bli_pool_finalize() to pass in either FALSE (from bli_apool_free_block() or bli_pba_finalize_pools()) or TRUE (from bli_pool_reinit()) for the new reinit argument. commit 63470b49e3b9b15e00a8f666e86ccd70c6005fe9 Author: Devin Matthews Date: Thu Sep 29 18:52:08 2022 -0500 Fix some bugs in bli_pool.c (#670) Details: - Add a check for premature pool exhaustion when checking in blocks via bli_pool_checkin_block(). This detects "double-free" and other bad conditions that don't necessarily result in a segfault. - Make sure to copy all block pointers when growing the pool size. Previously, checked-out block pointers (which are guaranteed to be set to NULL) were not being copied, leading to the presence of uninitialized data. commit 42d0e66318b186d25eeb215b40ce26115401ed8b Author: Devin Matthews Date: Thu Sep 29 17:38:02 2022 -0500 Add AddressSanitizer (-fsanitize=address) option. (#669) Details: - Added support for AddressSanitizer (ASan), a compiler-integrated memory error detector. The option (disabled by default) enables compiling and linking with the -fsanitize=address flag supported by clang, gcc, and probably others. This flag is employed during compilation of all BLIS source files *except* for optimized kernels, which are exempted because ASan usually requires an extra register, which violates the constraints for many gemm microkernels. - Minor whitespace, comment, ordering, and configure help text updates. commit b861c71b50c6d48cb07282f44aa9dddffc1f1b3f Author: Devin Matthews Date: Fri Sep 23 13:22:27 2022 -0500 Add consistent NaN/Inf handling in sumsqv. (#668) Details: - Changed sumsqv implementation as follows: - If there is a NaN (either real or imaginary), then return a sum of NaN and unit scale. - Else, if there is an Inf (either real or imaginary), then return a sum of +Inf and unit scale. - Otherwise behave as normal. commit ee81efc7887374c974a78bfb3e0865776b2f97a8 Author: Field G. Van Zee Date: Thu Sep 22 19:15:07 2022 -0500 Parameterized test/3 drivers via command line args. (#667) Details: - Rewrote the drivers in test/3, the Makefile, and the runme.sh script so that most of the important parameters, including parameter combo, datatype, storage combo, induced method, problem size range, dimension bindings, number of repeats, and alpha/beta values can be passed in via command line arguments. (Previously, most of these parameters were hard-coded into the driver source, except a few that were hard-coded into the Makefile.) If no argument is given for any particular option, it will be assigned a sane default. Either way, the values employed at runtime will be printed to stdout before the performance data in a section that is commented out with '%' characters (which is used by matlab and octave for comments), unless the -q option is given, in which case the driver will proceed quietly and output only performance data. Each driver also provides extensive help via the -h option, with the help text tailored for the operation in question (e.g. gemm, hemm, herk, etc.). In this help text, the driver reminds the user which implementation it was linked to (e.g. blis, openblas, vendor, eigen). Thanks to Jeff Diamond for suggesting this CLI-based reimagining of the test/3 drivers. - In the test/3 drivers: converted cpp macro string constants, as well as two string literals (for the opname and pc_str) used in each test driver, to global (or static) const char* strings, and replaced the use of strncpy() for storing the results of the command line argument parsing with pointer copies from the corresponding strings in argv. This works because the argv array is guaranteed by the C99 standard to persist throughout the life of the program. This new approach uses less storage and executes faster. Thanks to Minh Quan Ho for recommending this change. - Renamed the IMP_STR cpp macro that gets defined on the command line, via the test/3/Makefile, to IMPL_STR. - Updated runme.sh to set the problem size ranges for single-threaded and multithreaded execution independently from one another, as well as on a per-system basis. - Added a 'quiet' variable to runme.sh that can easily toggle quiet mode for the test drivers' output. - Very minor typecast fix in call to bli_getopt() in bli_utils.c. - In bli_getopt(), changed the nextchar variable from being a local static variable to a field of the getopt_t state struct. (Not sure why it was ever declared static to begin with.) - Other minor changes to bli_getopt() to accommodate the rewritten test drivers' command line parsing needs. commit 036a4f9d822df25a76a653e70be76fb02284d3d3 Author: Field G. Van Zee Date: Thu Sep 22 18:36:50 2022 -0500 Refactored some rntm_t management code. (#666) Details: - Separated the "sanitizing" code from the auto-factorization code in bli_rntm_set_ways_from_rntm() and _rntm_set_ways_from_rntm_sup(). The santizing code now resides in bli_rntm_sanitize() while the factorization code resides in bli_rntm_factorize() and bli_rntm_factorize_sup(). (There are two different functions because the conventional and sup factorization codes are currently somewhat different.) Also note that the factorization code now relies on the .auto_factor field to have already been set, either during rntm_t initialization or when the rntm_t was previously updated and santized. So rather than locally determining whether to auto- factorize, those functions just read the .auto_factor field and proceed accordingly. - Refactored and removed most code from bli_thread_init_rntm_from_env(). This function now reads the environment variables needed to set nt, jc, pc, ic, jr, and ir; sets them into the global rntm_t; and then calls bli_rntm_sanitize() in order to make sure that the contents are in a "good" state. Thanks to Devin Matthews for suggesting this refactoring. - Redefined bli_rntm_set_num_threads() and bli_rntm_set_ways() such that if multithreading is disabled at compile time (that is, if the cpp macro BLIS_ENABLE_MULTITHREADING is undefined), they ignore the caller's request and instead clear the nt and ways fields. - Redefined bli_thread_set_num_threads() and bli_thread_set_ways() such that if multithreading is disabled at compile time (that is, if the cpp macro BLIS_ENABLE_MULTITHREADING is undefined), they ignore the caller's request and do nothing. - Redefined bli_rntm_set_num_threads() and bli_rntm_set_ways() as true functions rather than static inline functions. - In bli_rntm.c, statically initialize the global_rntm global variable via the BLIS_RNTM_INITIALIZER macro. - In bli_rntm.h, defined bli_rntm_clear_auto_factor(), which sets the .auto_factor field of the rntm_t to FALSE. - Reorganized order of some inline function definitions in bli_rntm.h. - Changed the default value given to the .auto_factor field by the BLIS_RNTM_INITIALIZER macro from TRUE to FALSE. - Call bli_rntm_clear_auto_factor() instead of bli_rntm_set_auto_factor_only() in bli_rntm_init(). - Comment/whitespace updates. commit a1a5a9b4cbef9208da494c45a2f933a8e82559ac Author: Field G. Van Zee Date: Wed Sep 21 18:31:01 2022 -0500 Implemented support for fat multithreading. (#665) Details: - Allow the user to configure BLIS in such a way that multiple threading implementations get compiled into the library, with one of those implementations chosen at runtime. For now, there are only three implementations available: OpenMP, pthreads, and single. (Here, 'single' merely refers to single-threaded mode.) The configure script now allows the user to give the -t option with a comma-separated list of values, such as '-t openmp,pthreads'. The first value in the list will always be the default at library initialization time, and 'single' is always silently appended to the end of the list. The user can specify which implementation should execute in one of three ways: by setting the BLIS_THREAD_IMPL environment variable prior to launch; by calling the bli_thread_set_thread_impl() global runtime API; or by encoding their choice into a rntm_t that is passed into one of the expert interfaces. Any of these three choices overrides the initialization-time default (i.e., the first value listed to the -t configure option). Requesting an implementation that was not compiled into the library will result in an error message followed by bli_abort(). - Relocated the 'auto' logic for the -t option from the top-level Makefile to the configure script. (Currently, this logic is pretty dumb, choosing 'openmp' for gcc and icc, and 'pthreads' for clang.) - Defined a new 'timpl_t' enum in bli_type_defs.h, with three valid values: BLIS_SINGLE, BLIS_OPENMP, BLIS_POSIX. - Reorganized the thrcomm_t struct into a single defintion with two preprocessor blocks, one each for additional fields needed by OpenMP and pthreads. - Added timpl_t argument to bli_thrcomm_bcast(), bli_thrcomm_barrier(), bli_thrcomm_init(), and bli_thrcomm_cleanup(), which these functions need since they are now wrappers that choose the implementation- specific function corresponding to the currently enabled threading implementation. - Added rntm_t* to bli_thread_broadcast(), bli_thread_barrier() so that those functions can pass the timpl_t value into bli_thrcomm_bcast() and bli_thrcomm_barrier(), respectively. - Defined bli_env_get_str() in bli_env.c to allow the querying of BLIS_THREAD_IMPL (which, unlike BLIS_NUM_THREADS and friends, is expected to be a string). - Defined bli_thread_get_thread_impl(), bli_thread_set_thread_impl() to get and set the current threading implementation at runtime. - Defined bli_rntm_thread_impl() and bli_rntm_set_thread_impl() to query and set the threading implementation within a rntm_t. Also choose BLIS_SINGLE as the default value when initializing rntm_t structs. - Added bli_info_get_*() functions to query whether OpenMP or pthreads would be chosen as the default at init-time. Note that this only tests whether OpenMP or pthreads is the first implementation in the list passed to the threading configure option (-t) and is *not* the same as querying which implementation is currently selected, since that can be influenced by BLIS_THREAD_IMPL and/or bli_thread_set_thread_impl(). - Changed l3int_t to l3int_ft. - Updated docs/Multithreading.md to document the new behavior. - Updated sandbox/gemmlike and addon/gemmd to work with the new fat threading feature. This included a few bugfixes to bring the codes up to date, as necessary. - Comment, whitespace updates. commit 89df7b8fa3a3e47ab2fc10ac4d65d0b9fde16942 Author: Devin Matthews Date: Sun Sep 18 18:46:57 2022 -0500 De-templatized _sup_var1n2m.c; unified _sup_packm_a/b(). (#659) Details: - Re-expressed the two variants in frame/3/bli_l3_sup_var1n2m.c as a single function each that performs char* pointer arithmetic rather than four datatype-specific functions. Did the same for the functions in bli_l3_sup_packm_a.c and _sup_packm_b.c, and then unified the two into a single set of functions for packing either A or B, which now resides in bli_l3_sup_packm.c. - Pre-grow the cntl_t tree in both bli_l3_sup_var1n2m.c variants rather than grow them incrementally. - Relocated empty-matrix and scale-by-beta early return handlnig from bli_gemm_front() and bli_gemmt_front() to their _ex() counterparts. - Comment, whitespace updates. commit fb91337eff1ee2098f315a83888f6667b3a56f86 Author: Field G. Van Zee Date: Thu Sep 15 19:08:10 2022 -0500 Fixed a harmless pc_nt bug in 05a811e. Details: - Added missing curly braces around some statements in bli_rntm.c, one of which needed them in order for the relevant code to be executed in the intended way. The consequence of 05a811e omitting those braces was that a statement (pc_nt = 1;) was executed more often than it needed to be. - Also adjusted the analagous code in bli_thread.c to match that of bli_rntm.c. commit e86076bf4461d1a78186fb21ba8320cfb430f62c Author: Field G. Van Zee Date: Thu Sep 15 14:22:59 2022 -0500 Test the 'gemmlike' sandbox via AppVeyor. (#664) Details: - Added a fifth test to our .appveyor.yml that enables the 'gemmlike' sandbox with OpenMP enabled (via clang, the 'auto' configuration target, and building to a static library). Thanks to Jeff Diamond for pointing out that this test would be useful. commit 63177dca48cb7d066576d884da4a7a599ececebf Author: Field G. Van Zee Date: Thu Sep 15 11:21:26 2022 -0500 Fixed gemmlike sandbox bug introduced in 7c07b47. Details: - Fixed a bug in the 'gemmlike' sandbox that was introduced in 7c07b47. This bug was the result of the fact that the gemmlike implementation uses bli_thrinfo_sup_grow() to grow its thrinfo_t tree, but the aforementioned commit added an optimization that kicks in when the rntm_t .pack_a and .pack_b fields are both FALSE. Those fields were originally added only for sup execution; for large code path, they are intended to be ignored. But the default initial state of a rntm_t has those fields set to FALSE, which was inadvertantly activating the optimization (which targeted single-threaded cases only) and would cause multithreaded use cases of 'gemmlike' to segfault. The fix took the form of setting the .pack_a and .pack_b fields to TRUE in bls_gemm_ex(). - Added minimal 'const' and 'const'-casting to 'gemmlike' so that gcc stays quiet. commit 05a811e898b371a76581abd4afa416980cce7db9 Author: Field G. Van Zee Date: Tue Sep 13 19:24:05 2022 -0500 Initialize rntm_t nt/ways fields with 1 (not -1). (#663) Details: - Changed the way that rntm_t structs are initialized, mainly so that the global rntm_t that is set via environment variables at runtime may be queried by the application prior to any computation taking place. (Strictly speaking, the application may already query these fields, but they do not always contain valid values and often contain -1 when they are unset.) These changes also served to clarify how these parameters are treated, and homogenized the implementations of bli_rntm_set_ways_from_rntm(), bli_rntm_set_ways_from_rntm_sup(), and bli_thread_init_rntm_from_env(). Special thanks to Jeff Diamond, Leick Robinson, and Devin Matthews for pointing out that the previous behavior was needlessly confusing and could be improved. - The aforementioned modifications also included subtle changes as to what counts as "setting" a loop's ways of parallelism for the purposes of deciding whether to use the ways or the total number of threads. Previously, setting any loop's ways, even to 1, counted in favor of using the ways. Now, only values greater than 1 will count as "setting", and all other values will silently be mapped to 1, with those parameters treated as if they were untouched all along. - Updated bli_rntm.h and bli_thread.c so that any attempt to set the PC_NT variable (or pc_nt field of a rntm_t) will either ignore the request or reassert the value as 1. - Updated bli_rntm_set_ways() so that rather than clear the num_threads field, it is set to the product of all of the per-loop ways of parallelism. - Removed code from test_libblis.c that handled the possibility of unset environment variables when printing out their values. - Removed bli_rntm_equals() inline function from bli_rntm.h, which has long been disabled. - Updates to docs/Multithreading.md related to the aforementioned changes. - Comment updates. commit fd885cf98f4fe1d3bc46468e567776c37c670fcc Author: Field G. Van Zee Date: Tue Sep 13 11:50:23 2022 -0500 Use kernel CFLAGS for 'kernels' subdirs in addons. (#658) Details: - Updated Makefile and common.mk so that the targeted configuration's kernel CFLAGS are applied to source files that are found in a 'kernels' subdirectory within an enabled addon. For now, this behavior only applies when the 'kernels' directory is at the top level of the addon directory structure. For example, if there is an addon named 'foobar', the source code must be located in addon/foobar/kernels/ in order for it to be compiled with the target configurations's kernel CFLAGS. Any other source code within addon/foobar/ will be compiled with general-purpose CFLAGS (the same ones that were used on all addon code prior to this commit). Thanks to AMD (esp. Mithun Mohan) for suggesting this change and catching an intermediate bug in the PR. - Comment/whitespace updates. commit cb74202db39dc8cb81fdd06f8a445f8837e27853 Author: Field G. Van Zee Date: Tue Sep 13 11:46:24 2022 -0500 Fixed incorrect sizeof(type) in edge case macros. (#662) Details: - In bli_edge_case_macro_defs.h, the GEMM_UKR_SETUP_CT_PRE() and GEMMTRSM_UKR_SETUP_CT_PRE() macros previously declared their temporary ct microtiles as: PASTEMAC(ch,ctype) _ct[ BLIS_STACK_BUF_MAX_SIZE / sizeof( PASTEMAC(ch,type) ) ] \ __attribute__((aligned(alignment))); \ The problem here is that sizeof( PASTEMAC(ch,type) ) evaluates to things like sizeof( BLIS_DOUBLE ), not sizeof( double ), and since BLIS_DOUBLE is an enum, it is typically an int, which means the sizeof() expression is evaluating to the wrong value. This was likely a benign bug, though, since BLIS does not support any computational datatypes that are smaller than sizeof( int ), which means the ct array would be *over*-allocated rather than underallocated. Thanks to @moon-chilled for identifying and reporting this bug in #624. - CREDITS file update. commit 6e5431e8494b06bd80efcab3abf0a6456d6c0381 Author: Devin Matthews Date: Sat Sep 10 15:16:58 2022 -0500 Fix line number issue in flattened blis.h. (#660) Details: - Updated the top-level Makefile so that it invokes flatten-headers.py without the -c option, which was requesting that comments be stripped (since comment stripping is disabled by default). - Updated flatten-headers.py to accept a new option (-l) to enable insertion of #line directives into the output file. This new option is enabled by default. - Also added logic to flatten-headers.py that outputs a warning if both comment stripping and line numbers are requested since the comment stripping will cause the line numbers to become inaccurate. commit 4afe0cfdab0e069e027f97920ea604249e34df47 Author: Field G. Van Zee Date: Thu Sep 8 18:33:20 2022 -0500 Defined invscalv, invscalm, invscald operations. (#661) Details: - Defined invert-scale (invscal) operation on vectors (level-1v), matrices (level-1m), and diagonals (level-1d). - Added test modules for invscalv and invscalm to the testsuite. - Updated BLISObjectAPI.md and BLISTypedAPI.md API documentation to reflect the new operations. Also updated KernelsHowTo.md accordingly. - Renamed 'beta' to 'alpha' in scalv and scalm testsuite modules (and input.operations files) so that the parameter name matches the parameter used in the documentation. commit a87eae2b11408b556e562f1b04e673c6cd1612bc Author: Field G. Van Zee Date: Tue Sep 6 18:04:09 2022 -0500 Added '-q' quiet mode option to testsuite. (#657) Details: - Added support for a '-q' command line option to the testsuite. This option suppresses most informational output that would normally clutter up the screen. By default, verbose mode (the previous status quo) will be operative, and so quiet mode must be requested. commit dfa54139664a42d29774e140ec9e5597af869a76 Author: RuQing Xu Date: Tue Aug 30 08:07:50 2022 +0800 Arm64 dgemmsup with extended MR&NR (#655) Details: - Since the number of registers in NEON is large but their lengths are short, I'm here extending both MR and NR. - The approach is to represent the C microtile in registers optionally in columns, so for sizes like 6x7m, the 'crr' kernel is the default with 'rrr' supported through an in-register transpose. - A few asm kernels are crafted for 'rv' to complete this extended size support. - For 'rd' I'm still relying heavily on C99 intrinsic kernels with branching so the performance might not be optimal. (Sorry for that.) - So far, these changes only affect the 'firestorm' subconfig. - This commit also contains row-preferential s12x8 and d6x8 gemm ukernels. These microkernels are templatized versions of the existing s8x12 and d6x8 ukernels defined in bli_gemm_armv8a_asm_d6x8.c. commit 9e5594ad5fc41df8ef2825a025d7844ac2275c27 Author: Field G. Van Zee Date: Thu Aug 11 14:36:38 2022 -0500 Temporarily disabled #line directives from 6826c1c. Details: - Commented out the inclusion of #line preprocessor directives in the flattened header output provided by build/flatten-headers.py. This output was added recently in 6826c1c, but was later found to have thrown off the line numbering referenced by compiler warnings and errors (possibly due to license comment blocks, which are stripped from source headers as they are inlined into the monolithic header). commit 775148bcdbb1014b4881a76306f35f5d0fedecbe Author: jdiamondGitHub Date: Fri Aug 5 12:01:24 2022 -0500 Updated ARMv8a kernels to fix 2 prefetching issues. (#649) Details: - The ARMv8a dgemm/sgemm microkernels had 2 prefetching issues that impacted performance on modern ARM platforms. The most significant issue was that only a single prefetch per C tile column was issued. When a column of C was not cache aligned, the second cache line would not be prefetched at all, forcing the kernel to wait for an entire load to update elements of C. This happened with roughly 50% of the C prefetches. The fix was to have two prefetches per column, spaced 64 bytes (1 cache line) apart. - A secondary performance issue was that all the C prefetch instructions were issued sequentially at the beginning of the kernel call. This caused a noticeable performance slowdown. Interleaving the prefetch calls every 2-3 instructions in the prologue code solved the issue. commit bbaf29abd942de47a3a99a80a67d12bab41b27db Author: Field G. Van Zee Date: Thu Aug 4 17:51:37 2022 -0500 Very minor variable updates to common.mk. Details: - Fixed a harmless bug that would have allowed C++ headers into the list of header suffices specifically reserved for C99 headers. In practice, this would have had no substantive effect on anything since the core BLIS framework does not use C++ headers. commit a48e29d799091a833213efeafaf2d342ebdafde9 Author: Field G. Van Zee Date: Thu Jul 28 10:11:07 2022 -0500 CREDITS file update. Details: - Thanks to Kihiro Bando for assisting with issue #644. commit 5b298935de7f20462bfad1893ed34ecd691cec5a Author: Field G. Van Zee Date: Wed Jul 27 19:14:15 2022 -0500 Removed buggy cruft from power10 subconfig. Details: - Removed #defines for BLIS_BBN_s and BLIS_BBN_d from bli_kernel_defs_power10.h. These were inadvertently set in ae10d949 because the power10 subconfig was registering bb packm ukernels, but only for 6xk (power10 uses s8x16 and d8x8 ukernels) and only because the original author (probably) copy-pasted from power9 when getting started. That 6xk packm registration was effectively "dead code" prior to ae10d949, but was then mistaken as not-dead code during the ae10d949 refactor. These improper bb factors may have been causing bugs in power10 builds. Thanks to Nicholai Tukanov for helping remind me what the power10 subconfig was supposed to look like. - Removed extraneous microkernel preference registrations from power10 subconfig. Preferences for single and double complex gemm were being registered despite there being no complex gemm ukernels registered to go with them. Similarly, there were trsm preferences registered without any trsm ukernels registered (and BLIS doesn't actually use a preference for the trsm ukernel anyway). These extraneous registrations were almost surely not hurting anything, even if they were quite misleading. commit 56de31b00fa0f1ba866321817cd1e5d83000ff11 Author: Devin Matthews Date: Wed Jul 27 13:54:17 2022 -0500 Disable modification of KC in the gemmsup kernels. (#648) This led to a ~50% performance reduction for certain gemm operations (but not others?). See #644 for example. commit 4dde947e2ec9e139c162801320c94e6a01a39708 Author: Field G. Van Zee Date: Tue Jul 26 17:29:32 2022 -0500 Fixed out-of-bounds bug in sup s6x16m haswell kernel. Details: - Fixed another out-of-bounds read access bug in the haswell sup assembly kernels. This bug is similar to the one fixed in 17b0caa and affects bli_sgemmsup_rv_haswell_asm_6x2m(). Thanks to Madeesh Kannan for reporting this bug (and a suitable fix) in #635. - CREDITS file update. commit 6826c1cdfba855513786d9e3d606681316453398 Author: Devin Matthews Date: Mon Jul 25 18:21:05 2022 -0500 Add `#line` directives to flattened `blis.h`. (#643) Details: - Modified flatten-headers.py so that #line directives are inserted into the flattened blis.h file. This facilitates easier debugging when something is amiss in the flattened blis.h because the compiler will be able to refer to the line number within the original constituent header file (which is where the fix would go) rather than the line number within the flattened header (which is not as helpful). commit af3a41e02534befdae026377592ce437bab83023 Author: Alexander Grund Date: Thu Jul 21 18:05:48 2022 +0200 Add autodetection for POWER7, POWER9 & POWER10 (#647) Read from `/proc/cpuinfo` as done for ARM. Fixes #501 commit 17b0caa2b2bff439feb6d2b39cfa16e7591882b0 Author: Field G. Van Zee Date: Thu Jul 14 17:55:34 2022 -0500 Fixed out-of-bounds read in haswell gemmsup kernels. Details: - Fixed memory access bugs in the bli_sgemmsup_rv_haswell_asm_Mx2() kernels, where M = {1,2,3,4,5,6}. The bugs were caused by loading four single-precision elements of C, via instructions such as: vfmadd231ps(mem(rcx, 0*32), xmm3, xmm4) in situations where only two elements are guaranteed to exist. (These bugs may not have manifested in earlier tests due to the leading dimension alignment that BLIS employs by default.) The issue was fixed by replacing lines like the one above with: vmovsd(mem(rcx), xmm0) vfmadd231ps(xmm0, xmm3, xmm4) Thus, we use vmovsd to explicitly load only two elements of C into registers, and then operate on those values using register addressing. Thanks to Daniël de Kok for reporting these bugs in #635, and to Bhaskar Nallani for proposing the fix). - CREDITS file update. commit cc260fd7068f0fe449d818435aa11adb14c17fed Author: Field G. Van Zee Date: Wed Jul 13 16:16:01 2022 -0500 Allow uniform max problem sizes in test/3/runme.sh. Details: - Tweaked test/3/runme.sh so that the test driver binaries for single- threaded (st), single-socket (1s), and dual-socket (2s) execution can be built using identical problem size ranges. Previously, this was not possible because runme.sh used the maximum problem size, which was embedded into the binary filename, to tell the three classes of binaries apart from one another. Now, runme.sh uses the binary suffix ("st", "1s", or "2s") to tell them apart. This required only a few changes to the logic, but it also required a change in format to the threading config strings themselves (replacing the max problem size with "st", "1s", or "2s"). Thanks to Jeff Diamond for inspiring this improvement. - Comment updates. commit 9b1beec60be31c6ea20b85806d61551497b699e4 Author: bartoldeman Date: Mon Jul 11 20:15:12 2022 -0400 Use BLIS_ENABLE_COMPLEX_RETURN_INTEL in blastest files (#636) Details: - Fixed a crash that occurs when either cblat1 or zblat1 are linked with a build of BLIS that was compiled with '--complex-return=intel'. This fix involved inserting preprocessor macro guards based on BLIS_ENABLE_COMPLEX_RETURN_INTEL into blastest/src/cblat1.c and blastest/src/zblat1.c to correctly handle situations where BLIS is compiled with Intel/f2c-style calling conventions for complex numbers. - Updated blastest/src/fortran/run-f2c.sh so that future executions will insert the aforementioned cpp macro conditional where appropriate. commit 98d467891b74021ace7f248cb0856bec734e39b6 Author: bartoldeman Date: Mon Jul 11 19:40:53 2022 -0400 Change complex_return='intel' for ifx. (#637) Details: - When checking the version string of the Fortran compiler for the purposes of determining a default return convention for complex domain values, grep for "IFORT" instead of "ifort" since that string is common to both the 'ifx' and 'ifort' binaries provided by Intel: $ ifx --version ifx (IFORT) 2022.1.0 20220316 Copyright (C) 1985-2022 Intel Corporation. All rights reserved. $ ifort --version ifort (IFORT) 2021.6.0 20220226 Copyright (C) 1985-2022 Intel Corporation. All rights reserved. commit ffde54cc5c334aca8eff4d6072ba49496bf3104c Author: jdiamondGitHub Date: Mon Jul 11 16:47:30 2022 -0500 Minor changes to .gitignore and LICENSE files. (#642) Details: - Macs create .DS_Store files in every directory visited. Updated .gitignore file so these files won't be reported as untracked by 'git status'. - Added Oracle Corporation to the LICENSE file. - Updated UT copyright on behalf of SHPC. commit 7cba7ce3dd1533fcc4ca96ac902bdf218686139a Author: Field G. Van Zee Date: Fri Jul 8 11:15:18 2022 -0500 Minor cleanups, comment updates to bli_gks.c. Details: - Removed a redundant registration of 'a64fx' subconfig in bli_gks_init(). - Reordered registration of 'armsve', 'a64fx', and 'firestorm' subconfigs. Thanks to Jeff Diamond for his input on this reordering. - Comment updates to bli_gks.c and arch_t enum in bli_type_defs.h. commit 667f201b7871da68622027d02bd6b7da3262f8e8 Author: Field G. Van Zee Date: Thu Jul 7 16:44:21 2022 -0500 Fixed type bug in bli_cntx_set_ukr_prefs(). Details: - Fixed a bug in bli_cntx_set_ukr_prefs() which erroneously typecast the num_t value read from va_args() down to a bool before being stored within the cntx_t. This bug was introduced on April 6th 2022, in ae10d94. This caused the ukernel preferences for double real and double complex to go unchanged while the preferences for single real and single complex were corrupted by the former datatypes' preference values. The bug manifested as degraded performance for subconfigurations that registered column-preferential ukernels. The reason is that the erroneous preferences trigger unnecessary transpositions in the operation, which forces the gemm ukernel to compute on matrices that are not stored according to its preference. Thanks to Devin Matthews, Jeff Diamond, and Leick Robinson for their extensive efforts and assistance in tracking down this issue. - Augmented the informational header that is output by the testsuite to include ukernel preferences for gemm, gemmtrsm_[lu], and trsm_[lu]. - CREDITS file update. commit d429b6bfced21a63bf711224ac402f93f0080b52 Author: Isuru Fernando Date: Tue Jun 28 15:34:10 2022 -0500 Support clang targetting MinGW (#639) * Support clang targetting MinGW * Fix pthread linking commit d93df023348144e091f7b3e3053995648f348aa7 Author: Field G. Van Zee Date: Wed Jun 15 14:09:49 2022 -0500 Removed unused dt arg in bli_gks_query_ind_cntx(). Details: - Removed the num_t datatype argument from bli_gks_query_ind_cntx(). This argument stopped being needed by the function in commit e9da642. Its only use in bli_gks_query_ind_cntx() was to be passed through to the context initialization function for the chosen induced method, but even then, commit log notes from e9da642 indicate that I could not recall why the datatype argument was ever needed by the context init function to begin with. - Updated all invocations of bli_gks_query_ind_cntx() to omit the dt argument. Most of these invocations resided in various standalone test drivers (and the testsuite). commit 56772892450cc92b3fbd6a9d0460153a43fc47ab Author: Field G. Van Zee Date: Wed Jun 1 10:49:33 2022 -0500 Added SMU citation to README.md intro. Details: - Added a citation to SMU and the Matthews Research Group to the general attribution of maintainership and development in the Introduction of the README.md file. Thanks to Robert van de Geijn and Devin Matthews for suggesting this change. commit 4603324eb090dfceaad3693a70b2d60544036aa8 Author: Field G. Van Zee Date: Thu May 19 14:07:03 2022 -0500 Init/finalize via bli_pthread_switch_t API (#634). Details: - Defined and implemented a new pthread-like abstract datatype and API in bli_pthread.c. The new type, bli_pthread_switch_t, is similar to bli_pthread_once_t in some respects. The idea is that like a switch in your home that controls a light or ceiling fan, it can either be on or off. The switch starts in the off state. Moving from one state to the other (on to off; off to on) causes some action (i.e., a startup or shutdown function) to be executed. Trying to move from one state to the same state (on to on; off to off) is safe in that it results in no action. Unlike bli_pthread_once(), the API for bli_pthread_switch_t contains both _on() and _off() interfaces. Also, unlike the _once() function, the _on() and _off() functions return error codes so that the 'int' error code returned from the startup or shutdown functions may be passed back to the caller. Thanks to Devin Matthews for his input and feedback on this feature. - Replaced the previous implementation of bli_init_once() and bli_finalize_once() -- both of which used bli_pthread_once() -- with ones that rely upon bli_pthread_switch_on() and _switch_off(), respectively. This also required updating the return types of _init_apis() and _finalize_apis() to match the function pointer type required by bli_pthread_switch_on()/_switch_off(). - Comment updates. commit 64a9b061f6032e2b59613aecdbe7bb52161605c1 Author: Field G. Van Zee Date: Tue May 10 14:54:22 2022 -0500 Fixed misspelling of 'xpbys' in gemm macrokernel. Details: - Fixed a functionally harmless typo in bli_gemm_ker_var2.c where a few instances of the substring "xpbys" were misspelled as "xbpys". The misspellings were harmless because they were consistent, and because they referenced only local symbols. commit 1c733402a95ab08b20f3332c2397fd52a2627cf6 Author: Jed Brown Date: Thu Apr 28 11:58:44 2022 -0600 Fix version check for znver3, which needs gcc >= 10.3 (#628) Apple's clang-12 lacks znver3 support, unlike upstream clang-12. commit 6431c9e13b86e4442b6aacba18a0ace12288c955 Author: Field G. Van Zee Date: Thu Apr 14 13:01:24 2022 -0500 Added missing 'const' to zen bli_gemm_small.c. Details: - Added missing 'const' qualifiers to signatures of functions defined in kernels/zen/3/bli_gemm_small.c. This fixes compile-time errors when targeting 'zen3' subconfig (which apparently is enabling AMD's gemm_small code path by default). Thanks to Devin Matthews for reporting this error. commit 9fea633748ed27ef3853bba7cd955690c61092b4 Author: Devin Matthews Date: Wed Apr 13 15:59:06 2022 -0500 Partial addition of 'const' to all interfaces above the (micro)kernels. (#625) Details: - Added 'const' qualifier to applicable function arguments wherever the the pointed-to object is not internally modified. This change affects all interfaces that reside above the level of the (micro)kernels. - Typecast certain function return values to discard 'const' qualifier. - Removed 'restrict' from various arguments, including cntx_t*, auxinfo_t*, rntm_t*, thrinfo_t*, mem_t*, and others - Removed parts of some APIs, such as bli_cntx_*(), due to limited use. - Merged some variable declarations with their corresponding initialization statements. - Whitespace changes. commit ae10d9495486f589ed0320f0151b2d195574f1cf Author: Devin Matthews Date: Wed Apr 6 20:31:11 2022 -0500 Simplify and rewrite reference packm kernels. (#610) Details: - Reorganized the way kernels are stored within the cntx_t structure so that rather than having a function pointer for every supported size of unrolled packm kernel (2xk, 3xk, 4xk, etc.), we store only two packm kernels per datatype: one to pack MRxk micropanels and one to pack NRxk micropanels. - NOTE: The "bb" (broadcast B) reference kernels have been merged into the "standard" kernels (packm [including 1er and unpackm], gemm, trsm, gemmtrsm). This replication factor is controlled by BLIS_BB[MN]_[sdcz] etc. Power9/10 needs testing since only a replication factor of 1 has been tested. armsve also needs testing since the MR value isn't available as a macro. - Simplified the bli_cntx_*() APIs to conform to the new unified kernel array within the cntx_t. Updated existing bli_cntx_init_() function definitions for all subconfigurations. - Consolidated all kernel id types (e.g. l1vkr_t, l1mkr_t, l3ukr_t, etc.) into one kernel id type: ukr_t. - Various edits, updates, and rewrites of reference kernels pursuant to the aforementioned changes. - Define compile-time macro constants (BLIS_MR_[sdcz], BLIS_NR_[sdcz], and friends) in bli_kernel_macro_defs.h, but only when the macro BLIS_IN_REF_KERNEL is defined by the build system. - Loose ends: - Still need to update documentation, including: - docs/ConfigurationHowTo.md - docs/KernelsHowTo.md to reflect changes made in this commit. commit b3e674db3c05ca586b159a71deb1b61d701ae5c9 Author: Field G. Van Zee Date: Mon Apr 4 17:31:02 2022 -0500 README.md update to link to releases page. commit 69fa915464c52f09a5971a60f521900d31a34e69 Author: Field G. Van Zee Date: Fri Apr 1 08:47:46 2022 -0500 Fixed broken "tagged releases" link in README.md. commit 88cab8383ca90ddbb4cf13e69b7d44a1663a4425 Author: Field G. Van Zee Date: Fri Apr 1 08:12:06 2022 -0500 CHANGELOG update (0.9.0) commit 14c86f66b20901b60ee276da355c1b62642c18d2 Author: Field G. Van Zee Date: Fri Apr 1 08:12:06 2022 -0500 Version file update (0.9.0) commit 99bb9002f1aff598d347eae2821a3f7bdd1f48e8 Author: Field G. Van Zee Date: Fri Apr 1 08:10:59 2022 -0500 ReleaseNotes.md update in advance of next version. commit bee7678b2558a691ac850819dbe33fefe4fdbee3 Author: Field G. Van Zee Date: Thu Mar 31 14:09:39 2022 -0500 CREDITS file update. commit cf06364327bd2d21d606392371ff3c5962bee5ba Author: Field G. Van Zee Date: Tue Mar 29 16:18:25 2022 -0500 Fixed typo in BLAS gemm3m call to _check(). Details: - Fixed an unresolved symbol issue leftover from #590 whereby ?gemm3m_() as defined in bla_gemm3m.c was referencing bla_gemm3m_check(), which does not exist. It should have simply called the _check() function for gemm. commit 1ec020b33ece1681c0041e2549eed2bd4c6cf356 Author: Dipal M Zambare <71366780+dzambare@users.noreply.github.com> Date: Wed Mar 30 02:45:36 2022 +0530 AMD kernel updates; frame-specific AMD updates. (#597) Details: - Allow building BLIS with certain framework files (each with the '_amd' suffix) that have been customized by AMD for Zen-based hardware. These customized files were derived from portable versions of the same files (i.e., those without the '_amd' suffix). Whether the portable or AMD- specific files are compiled is now controlled by a new configure option, --[en|dis]able-amd-frame-tweaks. This option is disabled by default in vanilla BLIS, though AMD may choose to enable it by default in their fork. For now, the added AMD-specific files are: - bli_gemv_unf_var2_amd.c - bla_copy_amd.c - bla_gemv_amd.c These files reside in 'amd' subdirectories found within the directory housing their generic counterparts. - Register optimized real-domain copyv, setv, and swapv kernels in bli_cntx_init_zen.c. - Various minor updates to level-1v kernels in 'zen' kernel set. - Added caxpyf kernel as well as saxpyf and multiple daxpyf kernels to the 'zen' kernel set - If the problem passed to ?gemm_() in bla_gemm.c has a unit m or n dim, call gemv instead and return early. - Combined variable declarations with their initialization in various level-2 and level-3 BLAS compatibility files, and also inserted 'const' qualifer in those same declaration statements. - Moved frame/compat/bla_gemmt.c and .h to frame/compat/extra/ . - Added copyv and swapv test drivers to 'test' directory. - Whitespace, comment changes. commit 0db2bd5341c5c3ed5f1cc2bffa90952735efa45f Author: Bhaskar Nallani Date: Fri Mar 25 05:11:55 2022 +0530 Added BLAS/CBLAS APIs for gemm3m. (#590) Details: - Created ?gemm3m_() and cblas_?gemm3m() APIs that (for now) simply invoke the 1m implementation unconditionally. (Note that these APIs bypass sup handling.) - Added BLAS prototypes for gemm3m in frame/compat/bla_gemm3m.h. - Added CBLAS prototypes for gemm3m in frame/compat/cblas/src/cblas.h. - Relocated: frame/compat/cblas/src/cblas_?gemmt.c files into frame/compat/cblas/src/extra/ - Relocated frame/compat/bla_gemmt.? into frame/compat/extra/ . - Minor reorganization of prototypes and cpp macro directives in bli_blas.h, cblas.h, and cblas_f77.h. - Trival whitespace change to cblas_zgemm.c. commit d6810000e961fe807dc5a7db81180a8355f3eac0 Author: Devin Matthews Date: Mon Mar 14 10:29:54 2022 -0500 Update Multithreading.md Add notes about `BLIS_IR_NT` (should typically be 1) and `BLIS_JR_NT` (should typically be small, e.g. <= 4). [ci skip] commit f1dbb0e514f53a3240d3a6cbdc3306b01a2206f5 Author: Field G. Van Zee Date: Fri Mar 11 13:38:28 2022 -0600 Trival whitespace change; commit log addendum. Details: - A co-attribution to Mithun Mohan was inadvertently omitted from the commit log for headline change in the previous commit, 7c07b47. commit 7c07b477e432adbbce5812ed9341ba3092b03976 Author: Field G. Van Zee Date: Fri Mar 11 13:28:50 2022 -0600 Avoid gemmsup barriers when not packing A or B. (#622) Details: - Implemented a multithreaded optimization for the special (and common) case of employing the gemmsup code path when the user requests (implicitly or explicitly) that neither A nor B be packed during computation. This optimization takes the form of a greatly reduced code branch in bli_thrinfo_sup_create_for_cntl(), which avoids a broadcast and two barriers, and results in higher performance when obtaining two-way or higher parallelism within BLIS. Thanks to Bhaskar Nallani of AMD for proposing this change via issue #605. - Added an early return branch to bli_thrinfo_create_for_cntl() that detects and quickly handles cases where no parallelism is being obtained within BLIS (i.e., single-threaded execution). Note that this special case handling was/is already present in bli_thrinfo_sup_create_for_cntl(). - CREDITS file update. commit cad10410b2305bc0e328c5f2517ab02593b53428 Author: Ivan Korostelev Date: Thu Mar 10 09:58:14 2022 -0600 POWER10: edge cases in microkernel (#620) Use new API for POWER10 gemm microkernel commit 71851a0549276b17db18a0a0c8ab4f54493bf033 Author: Field G. Van Zee Date: Tue Mar 8 17:38:09 2022 -0600 Fixed level-3 performance bug in haswell ukernels. Details: - Fixed a performance regression affecting nearly all level-3 operations that use the 'haswell' sgemm and dgemm microkernels. This regression was introduced in 54fa28b, caused by an ill-formed conditional expression in the assembly code that controls whether cache lines of C should be prefetched as rows or as columns. Essentially, the two branches were reversed, causing incomplete prefetching to occur for both row- and column-stored instances of matrix C. Thanks to Devin Matthews for his help finding and fixing this bug. commit 84732bf95634ac606c5f2661d9474318e366c386 Author: Field G. Van Zee Date: Mon Feb 28 12:19:31 2022 -0600 Revamp how tools are handled/checked by configure. Details: - Consolidate handling of tools that are specifiable via CC, CXX, FC, PYTHON, AR, and RANLIB into one bash function, select_tool_w_env(). - If the user specifies a tool via an environment variable (e.g. CC=gcc) and that tool does not seem valid, print an error message and abort configure, unless the tool is optional (e.g. CXX or FC), in which case a warning message is printed instead. - The definition of "seems valid" above amounts to: - responding to at least one of a basic set of command line options (e.g. --version, -V, -h) if the os_name is Linux (since GNU tools tend to respond to flags such as --version) or if the tool in question is CC, CXX, FC, or PYTHON (which tend to respond to the expected flags regardless of OS) - the binary merely existing for AR and RANLIB on Darwin/OSX/BSD. (These OSes tend to have non-GNU versions of ar and ranlib, which typically do not respond to --version and friends.) - This PR addresses #584. Thanks to Devin Matthews for suggesting some of the changes in this commit. commit d5146582b1f1bcdccefe23925d3b114d40cd7e31 Author: RuQing Xu Date: Wed Feb 23 03:35:46 2022 +0900 ArmSVE Ensure Non-zero Block Size (#615) Fixes #613. There are several macros/environment variables which need to be tuned to get good cache block sizes. It would be nice to have a way of getting values automatically. commit 4d8352309784403ed6719528968531ffb4483947 Author: RuQing Xu Date: Wed Feb 23 01:03:47 2022 +0900 Add armsve to arm64 Metaconfig (#614) Availability of the `armsve` subconfig is controlled by the compiler version (gcc/clang). Tested for SVE and non-SVE. Fixes #612. commit c9700f369aa84fc00f36c4b817ffb7dab72b865d Author: Field G. Van Zee Date: Tue Feb 15 15:36:52 2022 -0600 Renamed SIMD-related macro constants for clarity. Details: - Renamed the following macros defined in bli_kernel_macro_defs.h: BLIS_SIMD_NUM_REGISTERS -> BLIS_SIMD_MAX_NUM_REGISTERS BLIS_SIMD_SIZE -> BLIS_SIMD_MAX_SIZE Also updated all instances of these macros elsewhere, including subconfigurations, source code, and documentation. Thanks to Devin Matthews for suggesting this change. commit ee9ff988c49f16696679d4c6cd3dcfcac7295be7 Author: Field G. Van Zee Date: Tue Feb 15 15:01:51 2022 -0600 Move edge cases to gemmtrsm ukrs; doc updates. Details: - Moved edge-case handling into the gemmtrsm microkernel. This required changing the microkernel API to take m and n dimension parameters as well as updating all existing gemmtrsm microkernel function pointer types, function signatures, and related definitions to take m and n dimensions. Also updated all existing gemmtrsm kernels in the 'kernels' directory (which for now is limited to haswell and penryn kernel sets, plus native and 1m-based reference kernels in 'ref_kernels') to take m and n dimensions, and implemented edge-case handling within those microkernels via a collection of new C preprocessor macros defined within bli_edge_case_macro_defs.h. Note that the edge-case handling for gemm-like operations had already been relocated into the gemm microkernel in 54fa28b. - Added desriptive comments to GEMM_UKR_SETUP_CT() and related macros in bli_edge_case_macro_defs.h to allow for easier reading. - Updated docs/KernelsHowTo.md to reflect above changes. Also cleaned up the bullet under "Implementation Notes for gemm" that covers alignment issues. (Thanks to Ivan Korostelev for pointing out the confusing and outdated language in issue #591.) - Other minor tweaks to KernelsHowTo.md. commit 25061593460767221e1066f9d720fa6676bbed8f Author: Devin Matthews Date: Sun Feb 13 20:11:55 2022 -0600 Don't use `-Wl,-flat-namespace`. Flat namespaces can cause problems due to conflicting system libraries, etc., so just mark `xerbla_` as a weak symbol on macOS instead. commit 5a4d3f5208d3d8cc1827f8cc90414c764b7ebab3 Author: Devin Matthews Date: Sun Feb 13 17:28:30 2022 -0600 Use -flat_namespace option to link on macOS Fixes #611. commit 26742910a087947780a089360e2baf82ea109e01 Author: Devin Matthews Date: Sun Feb 13 16:53:45 2022 -0600 Update CC_VENDOR logic Look for `GCC` in addition to `gcc` to handle weird conda version strings. [ci skip] commit 2f3872e01d51545c687ae2c8b2650e00552111a7 Author: RuQing Xu Date: Mon Feb 7 17:14:49 2022 +0900 ArmSVE Adopts Label Wrapper For clang (& armclang?) compilation. Hopefully solves #609 . commit 72089bb2917b78d99cf4f27c69125bf213ee54e6 Author: RuQing Xu Date: Sat Feb 5 16:56:04 2022 +0900 ArmSVE Use Predicate in M-Direction No need to query MR during kernel runtime. commit 9cc897f37455d52fbba752e3801f1a9d4a5bfdc1 Author: Ruqing Xu Date: Thu Feb 3 16:40:02 2022 +0000 Fix SVE Compil. commit b5df1811f1bc8212b2cda6bb97b79819afe236a8 Author: RuQing Xu Date: Thu Feb 3 02:31:29 2022 +0900 Armv8a, ArmSVE: Simplify Gen-C commit 35195bb5cea5d99eb3eaf41e3815137d14ceb52d Author: Devin Matthews Date: Mon Jan 31 10:29:50 2022 -0600 Add armclang detection to configure. armclang is treated as regular clang. Fixes #606. [ci skip] commit 0be9282cdccf73342d8571d3f7971a9b0af72363 Author: Field G. Van Zee Date: Wed Jan 26 17:46:24 2022 -0600 Updated zen3 macro constant names. Details: - In config/zen3/bli_family_zen3.h, renamed: BLIS_SMALL_MATRIX_A_THRES_M_GEMMT -> _M_SYRK BLIS_SMALL_MATRIX_A_THRES_N_GEMMT -> _N_SYRK Thanks to Jeff Diamond for helping spot the stale _SYRK naming. commit 0ab20c0e72402ba0b17fe2c3ed3e16bf2ace0fd3 Author: Jeff Hammond Date: Thu Jan 13 07:29:56 2022 -0800 the Apple local label thing is required by Clang in general @egaudry and I both saw this issue on Linux with Clang 10. ``` Compiling obj/thunderx2/kernels/armv8a/3/sup/bli_gemmsup_rv_armv8a_asm_d4x8m.o ('thunderx2' CFLAGS for kernels) kernels/armv8a/3/bli_gemm_armv8a_asm_d6x8.c:171:49: fatal error: invalid symbol redefinition " \n\t" ^ :90:5: note: instantiated into assembly here .SLOOPKITER: ^ 1 error generated. ``` Signed-off-by: Jeff Hammond commit 81f93be0561c705ae6823d19e40849facc40bef7 Author: Devin Matthews Date: Mon Jan 10 10:19:47 2022 -0600 Fix row-/column-major pref. in 16x8 haswell sgemm ukr (unused) commit 268ce1f29a717d18304713ecc25a2eafe41838c7 Author: Devin Matthews Date: Mon Jan 10 10:17:17 2022 -0600 Relax alignment constraints Remove alignment of temporary AB buffer in edge case handling macros unless alignment is specifically requested (e.g. Core2, SDB/IVB). Fixes #595. commit 3f2440b0226d5e23a43d12105d74aa917cd6c610 Author: Field G. Van Zee Date: Thu Jan 6 14:57:36 2022 -0600 Added m, n dims to gemmd/gemmlike ukernel calls. Details: - Updated the gemmd addon and the gemmlike sandbox code to use the new microkernel calling sequence, which now includes m and n dimensions so that the microkernel has all the information necessary to handle edge cases. Thanks to Jeff Diamond for catching this, which ideally would have been included in commit 54fa28b. - Retired var2 of both gemmd and gemmlike to 'attic' directories and removed their corresponding prototypes. In both cases, var2 was a variant of the block-panel algorithm where edge-case handling was abstracted away to a microkernel wrapper. (Since this is now the official behavior of BLIS microkernels, I saw no need to have it included as a separate code path.) - Comment updates. commit 864bfab4486ac910ef9a366e9ade4b45a39747fc Author: Field G. Van Zee Date: Tue Jan 4 15:10:34 2022 -0600 CREDITS file update. commit 466b68a3ad118342dc49a8130b7b02f5e7748521 Author: Devin Matthews Date: Sun Jan 2 14:59:41 2022 -0600 Add unique tag to branch labels for Apple ARM64. Add `%=` tag to branch labels, which expands to a unique identifier for each inline assembly block. This prevents duplicate symbol errors on Apple Silicon (#594). Fixes #594. [ci skip] since we can't test Apple Silicon anyways... commit 08174a2f6ebbd8ed5aa2bc4edc45da80962f06bb Author: RuQing Xu Date: Sat Jan 1 21:35:19 2022 +0900 Evict Requirement for SVE GEMM For 8<= GCC < 10 compatibility. commit 54fa28bd847b389215cffb57a83dc9b3dce79c86 Author: Devin Matthews Date: Fri Dec 24 08:00:33 2021 -0600 Move edge cases to gemm ukr; more user-custom mods. (#583) Details: - Moved edge-case handling into the gemm microkernel. This required changing the microkernel API to take m and n dimension parameters. This required updating all existing gemm microkernel function pointer types, function signatures, and related definitions to take m and n dimensions. We also updated all existing kernels in the 'kernels' directory to take m and n dimensions, and implemented edge-case handling within those microkernels via a collection of new C preprocessor macros defined within bli_edge_case_macro_defs.h. Also removed the assembly code that formerly would handle general stride IO on the microtile, since this can now be handled by the same code that does edge cases. - Pass the obj_t.ker_fn (of matrix C) into bli_gemm_cntl_create() and bli_trsm_cntl_create(), where this function pointer is used in lieu of the default macrokernel when it is non-NULL, and ignored when it is NULL. - Re-implemented macrokernel in bli_gemm_ker_var2.c to be a single function using byte pointers rather that one function for each floating-point datatype. Also, obtain the microkernel function pointer from the .ukr field of the params struct embedded within the obj_t for matrix C (assuming params is non-NULL and contains a non-NULL value in the .ukr field). Communicate both the gemm microkernel pointer to use as well as the params struct to the microkernel via the auxinfo_t struct. - Defined gemm_ker_params_t type (for the aforementioned obj_t.params struct) in bli_gemm_var.h. - Retired the separate _md macrokernel for mixed datatype computation. We now use the reimplemented bli_gemm_ker_var2() instead. - Updated gemmt macrokernels to pass m and n dimensions into microkernel calls. - Removed edge-case handling from trmm and trsm macrokernels. - Moved most of bli_packm_alloc() code into a new helper function, bli_packm_alloc_ex(). - Fixed a typo bug in bli_gemmtrsm_u_template_noopt_mxn.c. - Added test/syrk_diagonal and test/tensor_contraction directories with associated code to test those operations. commit 961d9d509dd94f3a66f7095057e3dc8eb6d89839 Author: Kiran Date: Wed Dec 8 03:00:38 2021 +0530 Re-add BLIS_ENABLE_ZEN_BLOCK_SIZES macro for 'zen'. Details: - Added previously-deleted cpp macro block to bli_cntx_init_zen.c targeting the Naples microarchitecture that enabled different cache blocksizes when the number of threads exceeds 16. This commit represents PR #573. commit cf7d616a2fd58e293b496770654040818bf5609c Author: Devin Matthews Date: Thu Dec 2 17:10:03 2021 -0600 Enable user-customized packm ukernel/variant. (#549) Details: - Added four new fields to obj_t: .pack_fn, .pack_params, .ker_fn, and .ker_params. These fields store pointers to functions and data that will allow the user to more flexibly create custom operations while recycling BLIS's existing partitioning infrastructure. - Updated typed API to packm variant and structure-aware kernels to replace the diagonal offset with panel offsets, and changed strides of both C and P to inc/ldim semantics. Updated object API to the packm variant to include rntm_t*. - Removed the packm variant function pointer from the packm cntl_t node definition since it has been replaced by the .pack_fn pointer in the obj_t. - Updated bli_packm_int() to read the new packm variant function pointer from the obj_t and call it instead of from the cntl_t node. - Moved some of the logic of bli_l3_packm.c to a new file, bli_packm_alloc.c. - Rewrote bli_packm_blk_var1.c so that it uses byte (char*) pointers instead of typed pointers, allowing a single function to be used regardless of datatype. This obviated having a separate implementation in bli_packm_blk_var1_md.c. Also relegated handling of scalars to a new function, bli_packm_scalar(). - Employed a new standard whereby right-hand matrix operands ("B") are always packed as column-stored row panels -- that is, identically to that of left-hand matrix operands ("A"). This means that while we pack matrix A normally, we actually pack B in a transposed state. This allowed us to simplify a lot of code throughout the framework, and also affected some of the logic in bli_l3_packa() and _packb(). - Simplified bli_packm_init.c in light of the new B^T convention described above. bli_packm_init()--which is now called from within bli_packm_blk_var1()--also now calls bli_packm_alloc() and returns a bool that indicates whether packing should be performed (or skipped). - Consolidated bli_gemm_int() and bli_trsm_int() into a bli_l3_int(), which, among other things, defaults the new .pack_fn field of the obj_t to bli_packm_blk_var1() if the field is NULL. - Defined a new function, bli_obj_reset_origin(), which permanently refocuses the view of an object so that it "forgets" any offsets from its original pointer. This function also sets the object's root field to itself. Calls to bli_obj_reset_origin() for each matrix operand appear in the _front() functions, after the obj_t's are aliased. This resetting of the underlying matrices' origins is needed in preparation for more advanced features from within custom packm kernels. - Redefined bli_pba_rntm_set_pba() from a regular function to a static inline function. - Updated gemm_ukr, gemmtrsm_ukr, and trsm_ukr testsuite modules to use libblis_test_pobj_create() to create local packed objects. Previously, these packed objects were created by calling lower-level functions. commit e229e049ca08dfbd45794669df08a71dba892925 Author: Field G. Van Zee Date: Wed Dec 1 17:36:22 2021 -0600 Added recu-sed.sh script to 'build' directory. Details: - Added a recursive sed script to the 'build' directory. commit 12c66a4acc77bf4927b01e2358e2ac10b61e0a53 Author: Field G. Van Zee Date: Fri Nov 19 14:43:53 2021 -0600 Minor updates to README.md, docs/Addons.md. Details: - Add additional mentions of addons to README.md, including in the "What's New" section. - Removed mention of sandboxes from the long list of advantages provided by BLIS. - Very minor description update to opening line of Addons.md. commit a4bc03b990fe0572001eb6409efd12cd70677dcf Author: Field G. Van Zee Date: Fri Nov 19 13:29:00 2021 -0600 Brief mention/link to Addons.md in README.md. Details: - Add a blurb about the new addons feature to the "Documentation for BLIS developers" section of the README.md, which also links to the Addons.md document. commit b727645eb7a8df39dee74068f734da66322fe0b3 Merge: 9be97c15 7bde468c Author: Field G. Van Zee Date: Fri Nov 19 13:22:09 2021 -0600 Merge branch 'dev' commit 9be97c150e19fa58bca30cb993a6509ae21e2025 Author: Madan mohan Manokar <86282872+madanm3@users.noreply.github.com> Date: Thu Nov 18 00:46:46 2021 +0530 Support all four dts in test/test_her[2][k].c (#578) Details: - Replaced the hard-coded calls to double-precision real syr, syr2, syrk, and syrk in the corresponding standalone test drivers in the 'test' directory with conditional branches that will call the appropriate BLAS interface depending on which datatype is enabled. Thanks to Madan mohan Manokar for this improvement. - CREDITS file update. commit 26e4b6b29312b472c3cadf95ccdf5240764777f4 Author: Dipal M Zambare <71366780+dzambare@users.noreply.github.com> Date: Thu Nov 18 00:32:00 2021 +0530 Added support for AMD's Zen3 microarchitecture. Details: - Added a new 'zen3' subconfiguration targeting support for the AMD Zen3 microarchitecture (#561). Thanks to AMD for this contribution. - Restructured clang and AOCC support for zen, zen2, and zen3 make_defs.mk files. The clang and AOCC version detection now happens in configure, not in the subconfigurations' makefile fragments. That is, we've added logic to configure that detects the version of clang/AOCC, outputs an appropriate variable to config.mk (ie: CLANG_OT_*, AOCC_OT_*), and then checks for it within the makefile fragment (as is currently done for the GCC_OT_* variables). - Added configure support for a GCC_OT_10_1_0 variable (and associated substitution anchor) to communicate whether the gcc version is older than 10.1.0, and use this variable to check for recent enough versions of gcc to use -march=znver3 in the zen3 subconfig. - Inlined the contents of config/zen/amd_config.mk into the zen and zen2 make_defs.mk so that the files are self-contained, harmonizing the format of all three Zen-based subconfigurations' make_defs.mk files. - Added indenting (with spaces) of GNU make conditionals for easier reading in zen, zen2, and zen3 make_defs.mk files. - Adjusted the range of models checked by bli_cpuid_is_zen() (which was previously 0x00 ~ 0xff and is now 0x00 ~ 0x2f) so that it is completely disjoint from the models checked by bli_cpuid_is_zen2() (0x30 ~ 0xff). This is normally necessary because Zen and Zen2 microarchitectures share the same family (23, or 0x17), and so the model code is the only way to differentiate the two. But in our case, fixing the model range for zen *wasn't* actually necessary since we checked for zen2 first, and therefore the wide zen range acted like the 'else' of an 'if-else' statement. That said, the change helps improve clarity for the reader by encoding useful knowledge, which was obtained from https://en.wikichip.org/wiki/amd/cpuid . - Added zen2.def and zen3.def files to the collection in travis/cpuid. Note that support for zen, zen2, and zen3 is now present, and while all the three microarchitectures have identical instruction sets from the perspective of BLIS microkernels, they each correspond to different subconfigurations and therefore merit separate testing. Thanks to Devin Matthews for his guidance in hacking these files as slight modifications of zen.def. - Enabled testing of zen2 and zen3 via the SDE in travis/do_sde.sh. Now, zen, zen2, and zen3 are tested through the SDE via Travis CI builds. - Updated travis/do_sde.sh to grab the SDE tarball from a new ci-utils repository on GitHub rather than on Intel's website. This change was made in an attempt to circumvent recent troubles with Travis CI not being able to download the SDE directly from Intel's website via curl. Thanks to Devin Matthews for suggesting the idea. - Updated travis/do_sde.sh to grab the latest version (8.69.1) of the Intel SDE from the flame/ci-utils repository. - Updated .travis.yml to use gcc 9. The file was previously using gcc 8, which did not support -march=znver2. - Created amd64_legacy umbrella family in config_registry for targeting older (bulldozer, piledriver, steamroller, and excavator) microarchitectures and moved those same subconfigs out of the amd64 umbrella family. However, x86_64 retains amd64_legacy as a constituent member. - Fixed a bug in configure related to the building of the so-called config list. When processing the contents of config_registry, configure creates a series of structures and lists that allow for various mappings related to configuration families, subconfigs, and kernel sets. Two of those lists are built via substitution of umbrella families with their subconfig members, and one of those lists was improperly performing the substitution in a way that would erroneously match on partial umbrella family names. That code was changed to match the code that was already doing the substitution properly, via substitute_words(). Also added comments noting the importance of using substitute_words() in both instances. - Comment updates. commit 74c0c622216aba0c24aa2c3a923811366a160cf5 Author: Field G. Van Zee Date: Tue Nov 16 16:06:33 2021 -0600 Reverted cbc88fe. Details: - Reverted the annotation of some markdown code blocks with 'bash' after realizing that the in-browser syntax highlighting was not worthwhile. commit cbc88feb51b949ce562d044cf9f99c4e46bb8a39 Author: Field G. Van Zee Date: Tue Nov 16 16:02:39 2021 -0600 Marked some markdown shell code blocks as 'bash'. Details: - Annotated the code blocks that represent shell commands and output as 'bash' in README.md and BuildSystem.md. commit 78cd1b045155ddf0b9ec6e2ab815f2b216ad9a9e Author: Field G. Van Zee Date: Tue Nov 16 15:53:40 2021 -0600 Added 'Example Code' section to README.md. Details: - Inserted a new 'Example Code' section into the README.md immediately after the 'Getting Started' section. Thanks to Devin Matthews for recommending this addition. - Moved the 'Performance' section of the README down slightly so that it appears after the 'Documentation' section. commit 7bde468c6f7ecc4b5322d2ade1ae9c0b88e6b9f3 Author: Field G. Van Zee Date: Sat Nov 13 16:39:37 2021 -0600 Added support for addons. Details: - Implemented a new feature called addons, which are similar to sandboxes except that there is no requirement to define gemm or any other particular operation. - Updated configure to accept --enable-addon= or -a syntax for requesting an addon be included within a BLIS build. configure now outputs the list of enabled addons into config.mk. It also outputs the corresponding #include directives for the addons' headers to a new companion to the bli_config.h header file named bli_addon.h. Because addons may wish to make use of existing BLIS types within their own definitions, the addons' headers must be included sometime after that of bli_config.h (which currently is #included before bli_type_defs.h). This is why the #include directives needed to go into a new top-level header file rather than the existing bli_config.h file. - Added a markdown document, docs/Addons.md, to explain addons, how to build with them, and what assumptions their authors should keep in mind as they create them. - Added a gemmlike-like implementation of sandwich gemm called 'gemmd' as an addon in addon/gemmd. The code uses a 'bao_' prefix for local functions, including the user-level object and typed APIs. - Updated .gitignore so that git ignores bli_addon.h files. commit 7bc8ab485e89cfc6032932e57929e208a28f4be5 Author: Meghana-vankadari <74656386+Meghana-vankadari@users.noreply.github.com> Date: Fri Nov 12 04:16:14 2021 +0530 Added BLAS/CBLAS APIs for axpby, gemm_batch. (#566) Details: - Expanded the BLAS compatibility layer to include support for ?axpby_() and ?gemm_batch_(). The former is a straightforward BLAS-like interface into the axpbyv operation while the latter implements a batched gemm via loops over bli_?gemm(). Also expanded the CBLAS compatibility layer to include support for cblas_?axpby() and cblas_?gemm_batch(), which serve as wrappers to the corresponding (new) BLAS-like APIs. Thanks to Meghana Vankadari for submitting these new APIs via #566. - Fixed a long-standing bug in common.mk that for some reason never manifested until now. Previously, CBLAS source files were compiled *without* the location of cblas.h being specified via a -I flag. I'm not sure why this worked, but it may be due to the fact that the cblas.h file resided in the same directory as all of the CBLAS source, and perhaps compilers implicitly add a -I flag for the directory that corresponds to the location of the source file being compiled. This bug only showed up because some CBLAS-like source code was moved into an 'extra' subdirectory of that frame/compat/cblas/src directory. After moving the code, compilation for those files failed (because the cblas.h header file, presumably, could not be found in the same location). This bug was fixed within common.mk by explicitly adding the cblas.h directory to the list of -I flags passed to the compiler. - Added test_axpbyv.c and test_gemm_batch.c files to 'test' directory, and updated test/Makefile to build those drivers. - Fixed typo in error message string in cblas_sgemm.c. commit 28b0982ea70c21841fb23802d38f6b424f8200e1 Author: Devin Matthews Date: Wed Nov 10 12:34:50 2021 -0600 Refactored her[2]k/syr[2]k in terms of gemmt. (#531) Details: - Renamed herk macrokernels and supporting files and functions to gemmt, which is possible since at the macrokernel level they are identical. Then recast herk/her2k/syrk/syr2k in terms of gemmt within the expert level-3 oapi (bli_l3_oapi_ex.c) while also redefining them as literal functions rather than cpp macros that instantiate multiple functions. Thanks to Devin Matthews for his efforts on this issue (#531). - Check that the maximum stack buffer size is sufficiently large relative to the register blocksizes for each datatype, and do so when the context is initialized rather than when an operation is called. Note that with this change, users who pass in their own contexts into the expert interfaces currently will *not* have any checks performed. Thanks to Devin Matthews for suggesting this change. commit cfa3db3f3465dc58dbbd842f4462e4b49e7768b4 Author: Field G. Van Zee Date: Wed Nov 3 18:13:56 2021 -0500 Fixed bug in mixed-dt gemm introduced in e9da642. Details: - Fixed a bug that broke certain mixed-datatype gemm behavior. This bug was introduced recently in e9da642 when the code that performs the operation transposition (for microkernel IO preference purposes) was moved up so that it occurred sooner. However, when I moved that code, I failed to notice that there was a cpp-protected "if" conditional that applied to the entire code block that was moved. Once the code block was relocated, the orphaned if-statement was now (erroneously) glomming on to the next thing that happened to be in the function, which happened to be the call to bli_rntm_set_ways_for_op(), causing a rather odd memory exhaustion error in the sba due to the num_threads field of the rntm_t still being -1 (because the rntm_t field were never processed as they should have been). Thanks to @ArcadioN09 (Snehith) for reporting this error and helpfully including relevant memory trace output. commit f065a8070f187739ec2b34417b8ab864a7de5d7e Author: Field G. Van Zee Date: Thu Oct 28 16:05:43 2021 -0500 Removed support for 3m, 4m induced methods. Details: - Removed support for all induced methods except for 1m. This included removing code related to 3mh, 3m1, 4mh, 4m1a, and 4m1b as well as any code that existed only to support those implementations. These implementations were rarely used and posed code maintenance challenges for BLIS's maintainers going forward. - Removed reference kernels for packm that pack 3m and 4m micropanels, and removed 3m/4m-related code from bli_cntx_ref.c. - Removed support for 3m/4m from the code in frame/ind, then reorganized and streamlined the remaining code in that directory. The *ind(), *nat(), and *1m() APIs were all removed. (These additional API layers no longer made as much sense with only one induced method (1m) being supported.) The bli_ind.c file (and header) were moved to frame/base and bli_l3_ind.c (and header) and bli_l3_ind_tapi.h were moved to frame/3. - Removed 3m/4m support from the code in frame/1m/packm. - Removed 3m/4m support from trmm/trsm macrokernels and simplified some pointer arithmetic that was previously expressed in terms of the bli_ptr_inc_by_frac() static inline function (whose definition was also removed). - Removed the following subdirectories of level-0 macro headers from frame/include/level0: ri3, rih, ri, ro, rpi. The level-0 scalar macros defined in these directories were used exclusively for 3m and 4m method codes. - Simplified bli_cntx_set_blkszs() and bli_cntx_set_ind_blkszs() in light of 1m being the only induced method left within BLIS. - Removed dt_on_output field within auxinfo_t and its associated accessor functions. - Re-indexed the 1e/1r pack schemas after removing those associated with variants of the 3m and 4m methods. This leaves two bits unused within the pack format portion of the schema bitfield. (See bli_type_defs.h for more info.) - Spun off the basic and expert interfaces to the object and typed APIs into separate files: bli_l3_oapi.c and bli_l3_oapi_ex.c; bli_l3_tapi.c and bli_l3_tapi_ex.c. - Moved the level-3 operation-specific _check function calls from the operations' _front() functions to the corresponding _ex() function of the object API. (This change roughly maintains where the _check() functions are called in the call stack but lays the groundwork for future changes that may come to the level-3 object APIs.) Minor modifications to bli_l3_check.c to allow the check() functions to be called from the expert interface APIs. - Removed support within the testsuite for testing the aforementioned induced methods, and updated the standalone test drivers in the 'test' directory so reflect the retirement of those induced methods. - Modified the sandbox contract so that the user is obliged to define bli_gemm_ex() instead of bli_gemmnat(). (This change was made in light of the *nat() functions no longer existing.) Also updated the existing 'power10' and 'gemmlike' sandboxes to come into compliance with the new sandbox rules. - Updated BLISObjectAPI.md, BLISTypedAPI.md, Testsuite.md documentation to reflect the retirement of 3m/4m, and also modified Sandboxes.md to bring the document into alignment with new conventions. - Updated various comments; removed segments of commented-out code. commit e8caf200a908859fa5f5ea2049911a9bdaa3d270 Author: Field G. Van Zee Date: Mon Oct 18 13:04:15 2021 -0500 Updated do_sde.sh to get SDE from GitHub. Details: - Updated travis/do_sde.sh so that the script downloads the SDE tarball from a new ci-utils repository on GitHub rather than from Intel's website. This change is being made in an attempt to circumvent Travis CI's recent troubles with downloading the SDE from Intel's website via curl. Thanks to Devin Matthews for suggesting the idea. commit 290ff4b1c26737b074d5abbf76966bc22af8c562 Author: Field G. Van Zee Date: Thu Oct 14 16:09:43 2021 -0500 Disable SDE testing of old AMD microarchitectures. Details: - Skip testing on piledriver, steamroller, and excavator platforms in travis/do_sde.sh. commit 514fd101742dee557e5eb43d0023a221ae8a7172 Author: Field G. Van Zee Date: Thu Oct 14 13:50:28 2021 -0500 Fixed substitution bug in configure. Details: - Fixed a bug in configure related to the building of the so-called config list. When processing the contents of config_registry, configure creates a series of structures and list that allow for various mappings related to configuration families, subconfigs, and kernel sets. Two of those lists are built via subsitituion of umbrella families with their subconfig members, and one of those lists was improperly performing the subtitution in a way that would erroneously match on partial umbrella family names. That code was changed to match the code that was already doing the subtitution properly, via substitute_words(). - Added comments noting the importance of using substitute_words() in both instances. commit e9da6425e27a9d63c9fef92afc2dd750c601ccd7 Author: Field G. Van Zee Date: Wed Oct 13 14:15:38 2021 -0500 Allow use of 1m with mixing of row/col-pref ukrs. Details: - Fixed a bug that broke the use of 1m for dcomplex when the single- precision real and double-precision real ukernels had opposing I/O preferences (row-preferential sgemm ukernel + column-preferential dgemm ukernel, or vice versa). The fix involved adjusting the API to bli_cntx_set_ind_blkszs() so that the induced method context init function (e.g., bli_cntx_init__ind()) could call that function for only one datatype at a time. This allowed the blocksize scaling (which varies depending on whether we're doing 1m_r or 1m_c) to happen on a per-datatype basis. This fixes issue #557. Thanks to Devin Matthews and RuQing Xu for helping discover and report this bug. - The aforementioned 1m fix required moving the 1m_r/1m_c logic from bli_cntx_ref.c into a new function, bli_l3_set_schemas(), which is called from each level-3 _front() function. The pack_t schemas in the cntx_t were also removed entirely, along with the associated accessor functions. This in turn required updating the trsm1m-related virtual ukernels to read the pack schema for B from the auxinfo_t struct rather than the context. This also required slight tweaks to bli_gemm_md.c. - Repositioned the logic for transposing the operation to accommodate the microkernel IO preference. This mostly only affects gemm. Thanks to Devin Matthews for his help with this. - Updated dpackm pack ukernels in the 'armsve' kernel set to avoid querying pack_t schemas from the context. - Removed the num_t dt argument from the ind_cntx_init_ft type defined in bli_gks.c. The context initialization functions for induced methods were previously passed a dt argument, but I can no longer figure out *why* they were passed this value. To reduce confusion, I've removed the dt argument (including also from the function defintion + prototype). - Commented out setting of cntx_t schemas in bli_cntx_ind_stage.c. This breaks high-leve implementations of 3m and 4m, but this is okay since those implementations will be removed very soon. - Removed some older blocks of preprocessor-disabled code. - Comment update to test_libblis.c. commit 81e103463214d589071ccbe2d90b8d7c19a186e4 Author: Minh Quan Ho <1337056+hominhquan@users.noreply.github.com> Date: Wed Oct 13 20:28:02 2021 +0200 Alloc at least 1 elem in pool_t block_ptrs. (#560) Details: - Previously, the block_ptrs field of the pool_t was allowed to be initialized as any unsigned integer, including 0. However, a length of 0 could be problematic given that malloc(0) is undefined and therefore variable across implementations. As a safety measure, we check for block_ptrs array lengths of 0 and, in that case, increase them to 1. - Co-authored-by: Minh Quan Ho commit 327481a4b0acf485d0cbdd8635dd9b886ba3f2a7 Author: Minh Quan Ho <1337056+hominhquan@users.noreply.github.com> Date: Tue Oct 12 19:53:04 2021 +0200 Fix insufficient pool-growing logic in bli_pool.c. (#559) Details: - The current mechanism for growing a pool_t doubles the length of the block_ptrs array every time the array length needs to be increased due to new blocks being added. However, that logic did not take in account the new total number of blocks, and the fact that the caller may be requesting more blocks that would fit even after doubling the current length of block_ptrs. The code comments now contain two illustrating examples that show why, even after doubling, we must always have at least enough room to fit all of the old blocks plus the newly requested blocks. - This commit also happens to fix a memory corruption issue that stems from growing any pool_t that is initialized with a block_ptrs length of 0. (Previously, the memory pool for packed buffers of C was initialized with a block_ptrs length of 0, but because it is unused this bug did not manifest by default.) - Co-authored-by: Minh Quan Ho commit 32a6d93ef6e2af5e486dfd5e46f8272153d3d53d Merge: 408906fd 2604f407 Author: Devin Matthews Date: Sat Oct 9 15:53:54 2021 -0500 Merge pull request #543 from xrq-phys/armsve-packm-fix ARMSVE Block SVE-Intrinsic Kernels for GCC 8-9 commit 408906fdd8892032aa11bd061b7971128f453bef Merge: 4277fec0 ccf16289 Author: Devin Matthews Date: Sat Oct 9 15:50:25 2021 -0500 Merge pull request #542 from xrq-phys/armsve-zgemm Arm SVE CGEMM / ZGEMM Natural Kernels commit ccf16289d2e71fd9511ccf2d13dcebbfa29deabc Author: RuQing Xu Date: Fri Oct 8 12:34:14 2021 +0900 Arm SVE C/ZGEMM Fix FMOV 0 Mistake FMOV [hsd]M, #imm does not allow zero immediate. Use wzr, xzr instead. commit 82b61283b2005f900101056e6df2a108258db602 Author: RuQing Xu Date: Fri Oct 8 12:17:29 2021 +0900 SH Kernel Unused Eigher commit 1749dfa493054abd2e4ddba7cb21278d337e4f74 Author: RuQing Xu Date: Fri Oct 8 12:11:53 2021 +0900 Arm SVE C/ZGEMM Support *beta==0 commit 4b648e47daad256ab8ab698173a97f71ab9f75eb Author: RuQing Xu Date: Wed Sep 22 16:42:09 2021 +0900 Arm SVE Config armsve Use ZGEMM/CGEMM commit f76ea905e216cf640975e6319c6d2f54aeafed2e Author: RuQing Xu Date: Tue Sep 21 20:38:44 2021 +0900 Arm SVE: Update Perf. Graph Pic. size seems a bit different from upstream. Generaged w/ MATLAB. Open to any change. commit 66a018e6ad00d9e8967b67e1aa3e23b20a7efdfe Author: RuQing Xu Date: Mon Sep 20 00:16:11 2021 +0900 Arm SVE CGEMM 2Vx10 Unindex Process Alpha=1.0 commit 9e1e781cb59f8fadb2a10a02376d3feac17ce38d Author: RuQing Xu Date: Sun Sep 19 23:30:42 2021 +0900 Arm SVE ZGEMM 2Vx10 Unindex Process Alpha=1.0 commit f7c6c2b119423e7ba7a24ae2156790e076071cba Author: RuQing Xu Date: Thu Sep 16 01:47:42 2021 +0900 A64FX Config Use ZGEMM/CGEMM commit e4cabb977d038688688aca39b366f98f9c36b7eb Author: RuQing Xu Date: Thu Sep 16 01:34:26 2021 +0900 Arm SVE Typo Fix ZGEMM/CGEMM C Prefetch Reg commit b677e0d61b23f26d9536e5c363fd6bbab6ee1540 Author: RuQing Xu Date: Thu Sep 16 01:18:54 2021 +0900 Arm SVE Add SGEMM 2Vx10 Unindexed commit 3f68e8309f2c5b31e25c0964395a180a80014d36 Author: RuQing Xu Date: Thu Sep 16 01:00:54 2021 +0900 Arm SVE ZGEMM Support Gather Load / Scatt. St. commit c19db2ff826e2ea6ac54569e8aa37e91bdf7cabe Author: RuQing Xu Date: Wed Sep 15 23:39:53 2021 +0900 Arm SVE Add ZGEMM 2Vx10 Unindexed commit e13abde30b9e0e381c730c496e74bc7ae062a674 Author: RuQing Xu Date: Wed Sep 15 04:19:45 2021 +0900 Arm SVE Add ZGEMM 2Vx7 Unindexed commit 49b9d7998eb86f340ae7b26af3e5a135d6a8feee Author: RuQing Xu Date: Tue Sep 14 04:02:47 2021 +0900 Arm SVE Add ZGEMM 2Vx8 Unindexed commit 4277fec0d0293400497ae8bcfc32be5e62319ae9 Merge: 2329d990 f44149f7 Author: Devin Matthews Date: Thu Oct 7 13:47:22 2021 -0500 Merge pull request #533 from xrq-phys/arm64-hi-bw ARMv8 PACKM and GEMMSUP Kernels + Apple Firestorm Subconfig commit 2329d99016fe1aeb86da4552295f497543cea311 Author: Devin Matthews Date: Thu Oct 7 12:37:58 2021 -0500 Update Travis CI badge [ci skip] commit f44149f787ae3d4b53d9c4d8e6f23b2818b7770d Author: RuQing Xu Date: Fri Oct 8 02:35:58 2021 +0900 Armv8 Trash New Bulk Kernels - They didn't make much improvements. - Can't register row-preferral and column-preferral ukrs at the same time. Will break 1m. commit 70b52cadc5ef4c16431e1876b407019e6286614e Author: Devin Matthews Date: Thu Oct 7 12:34:35 2021 -0500 Enable testing 1m in `make check`. commit 2604f4071300d109f28c8438be845aeaf3ec44e4 Author: RuQing Xu Date: Thu Oct 7 02:39:00 2021 +0900 Config ArmSVE Unregister 12xk. Move 12xk to Old commit 1e3200326be9109eb0f8c7b9e4f952e45700cbba Author: RuQing Xu Date: Thu Oct 7 02:37:14 2021 +0900 Revert __has_include(). Distinguish w/ BLIS_FAMILY_** commit a4066f278a5c06f73b16ded25f115ca4b7728ecb Author: RuQing Xu Date: Thu Oct 7 02:26:05 2021 +0900 Register firestorm into arm64 Metaconfig commit d7a3372247c37568d142110a1537632b34b8f2ff Author: RuQing Xu Date: Thu Oct 7 02:25:14 2021 +0900 Armv8 DGEMMSUP Fix Edge 6x4 Switch Case Typo commit 2920dde5ac52e09f84aa42990aab8340421522ce Author: RuQing Xu Date: Thu Oct 7 02:01:45 2021 +0900 Armv8 DGEMMSUP Fix 8x4m Store Inst. Typo commit 14b13583f1802c002e195b3b48874b3ebadbeb20 Author: Devin Matthews Date: Wed Oct 6 10:22:34 2021 -0500 Add test for Apple M1 (firestorm) This test will run on Linux, but all the kernels should run just fine. This does not test autodetection but then none of the other ARM tests do either. commit a024715065532400da6257b8b3124ca5aecda405 Author: RuQing Xu Date: Thu Oct 7 00:15:54 2021 +0900 Firestorm CPUID Dispatcher Commenting out due to possibly a Xcode bug. commit b9da6d55fec447d05c8b67f34ce83617123d8357 Author: RuQing Xu Date: Wed Oct 6 12:25:54 2021 +0900 Armv8 GEMMSUP Edge Cases Require Signed Ints Fix a bug in bli_gemmsup_rd_armv8a_asm_d6x8m.c. For safety upon similar strategies in the future, change all [mn]_[iter/left] into signed ints. commit 34919de3df5dda7a06fc09dcec12ca46dc8b26f4 Author: Devin Matthews Date: Sat Oct 2 18:48:50 2021 -0500 Make error checking level a thread-local variable. Previously, this was a global variable. Setting the value was synchronized via a mutex but reading the value was not. Of course, these accesses are almost certainly atomic, but there is still the possibility of one thread attempting to set the value and then reading the value set by another thread. For correct operation under user threading (e.g. pthreads), this should probably be thread-local with no mutex. commit c3024993c3d50236fad112822215f066496c5831 Author: Devin Matthews Date: Tue Oct 5 15:20:27 2021 -0500 Fix data race in testsuite. commit 353a0d82572f26e78102cee25693130ce6e0ea5b Author: Devin Matthews Date: Tue Oct 5 14:24:17 2021 -0500 Update .appveyor.yml [ci skip] commit 4bfadf9b561d4ebe0bbaf8b6d332f07ff531d618 Author: RuQing Xu Date: Wed Oct 6 01:51:26 2021 +0900 Firestorm Block Size Fixes commit 40baf83f0ea2749199b93b5a8ac45c01794b008c Author: RuQing Xu Date: Wed Oct 6 01:00:52 2021 +0900 Armv8 Handle *beta == 0 for GEMMSUP ??r Case. commit 079fbd42ce8cf7ea67a939b0f80f488de5821319 Merge: f5c03e9f 9905f443 Author: Devin Matthews Date: Mon Oct 4 17:21:48 2021 -0500 Merge branch 'master' into arm64-hi-bw commit 9905f44347eea4c57ef4927b81f1c63e76a92739 Merge: 6d3036e3 64a421f6 Author: Devin Matthews Date: Mon Oct 4 15:58:59 2021 -0500 Merge pull request #553 from flame/rpath-fix Add an option to use an @rpath-dependent install_name on macOS commit 6d3036e31d8a2c1acbc1260489eeb8f535a8f97a Merge: 53377fcc eaa554aa Author: Devin Matthews Date: Mon Oct 4 15:58:43 2021 -0500 Merge pull request #545 from hominhquan/clean_error bli_error: more cleanup on the error strings array commit 53377fcca91e595787b38e2a47780ac0c35a7e7c Merge: d0a0b4b8 80c5366e Author: Devin Matthews Date: Mon Oct 4 15:45:53 2021 -0500 Merge pull request #554 from flame/armsve-cleanup Move unused ARM SVE kernels to "old" directory. commit 80c5366e4a9b8b72d97fba1eab89bab8989c44f4 Author: Devin Matthews Date: Mon Oct 4 15:40:28 2021 -0500 Move unused ARM SVE kernels to "old" directory. commit 64a421f6983ab5bc0b55df30a2ddcfff5bfd73be Author: Devin Matthews Date: Mon Oct 4 13:40:43 2021 -0500 Add an option to control whether or not to use @rpath. Adds `--enable-rpath/--disable--rpath` (default disabled) to use an install_name starting with @rpath/. Otherwise, set the install_name to the absolute path of the install library, which was the previous behavior. commit c4a31683dd6f4da3065d86c11dd998da5192740a Author: Devin Matthews Date: Mon Oct 4 13:27:10 2021 -0500 Fix $ORIGIN usage on linux. commit d0a0b4b841fce56b7b2d3c03c5d93ad173ce2b97 Author: Dave Love Date: Mon Oct 4 18:03:04 2021 +0000 Arm micro-architecture dispatch (#344) Details: - Reworked support for ARM hardware detection in bli_cpuid.c to parse the result of a CPUID-like instruction. - Added a64fx support to bli_gks.c. - #include arm64 and arm32 family headers from bli_arch_config.h. - Fix the ordering of the "armsve" and "a64fx" strings in the config_name string array in bli_arch.c. The ordering did not match the ordering of the corresponding arch_t values in bli_type_defs.h, as it should have all along. - Added clang support to make_defs.mk in arm64, cortexa53, cortexa57 subconfigs. - Updated arm64 and arm32 families in config_registry. - Updated docs/HardwareSupport.md to reflect added ARM support. - Thanks to Dave Love, RuQing Xu, and Devin Matthews for their contributions in this PR (#344). commit 91408d161a2b80871463ffb6f34c455bdfb72492 Author: Devin Matthews Date: Mon Oct 4 11:37:48 2021 -0500 Use @path-based install name on MacOS and use relocatable RPATH entries for testsuite inaries. - RPATH entries (and DYLD_LIBRARY_PATH) do nothing on macOS unless the install_name of the library starts with @rpath/. While the install_name can be set to the absolute install path, this makes the installation non-relocatable. When using @path in the install_name, install paths within the normal DYLD_LIBRARY_PATH work with no changes on the user side, but for install paths off the beaten track, users must specify an RPATH entry when linking (or modify DYLD_LIBRARY_PATH at runtime). Perhaps this could be made into a configure-time option. - Having relocable testsuite binaries is not necessarily a priority but it is easy to do with @executable_path (macOS) or $ORIGIN (linux/BSD). commit f5c03e9fe808f9bd8a3e0c62786334e13c46b0fc Author: RuQing Xu Date: Sun Oct 3 16:51:51 2021 +0900 Armv8 Handle *beta == 0 for GEMMSUP ?rc Case. commit abc648352c591e26ceee436bd3a45400115b70c5 Author: RuQing Xu Date: Sun Oct 3 13:14:19 2021 +0900 Armv8 Fix 6x8 Row-Maj Ukr - Fixed for 6x8 only, 4x4 & 4x8 pending; - Installed to config firestorm as benchmark seems to show better perf: Old: blis_dgemm_ukr_c 6 8 320 36.87 2.43e-17 PASS blis_dgemm_ukr_c 6 8 352 40.55 1.04e-17 PASS blis_dgemm_ukr_c 6 8 384 44.24 5.68e-17 PASS blis_dgemm_ukr_c 6 8 416 41.67 3.51e-17 PASS blis_dgemm_ukr_c 6 8 448 34.41 2.94e-17 PASS blis_dgemm_ukr_c 6 8 480 42.53 2.35e-17 PASS New: blis_dgemm_ukr_r 6 8 352 50.69 1.59e-17 PASS blis_dgemm_ukr_r 6 8 384 49.15 5.55e-17 PASS blis_dgemm_ukr_r 6 8 416 50.44 2.86e-17 PASS blis_dgemm_ukr_r 6 8 448 46.92 3.12e-17 PASS blis_dgemm_ukr_r 6 8 480 48.08 4.08e-17 PASS commit 0a45bc0fbc7aee3876c315ed567fc37f19cdc57f Merge: 5013a6cb 13dbd5b5 Author: Devin Matthews Date: Sat Oct 2 18:59:43 2021 -0500 Merge pull request #552 from flame/armsve_beta_0 Add explicit handling for beta == 0 in armsve sd and armv7a d gemm ukrs. commit 13dbd5b5d3dbf27e33ecf0e98d43c97019a6339d Author: Devin Matthews Date: Sat Oct 2 20:40:25 2021 +0000 Apply patch from @xrq-phys. commit ae0eeeaf77c77892db17027cef10b95ec97c904f Author: Devin Matthews Date: Wed Sep 29 16:42:33 2021 -0500 Add explicit handling for beta == 0 in armsve sd and armv7a d gemm ukrs. commit 5013a6cb7110746c417da96e4a1308ef681b0b88 Author: Field G. Van Zee Date: Wed Sep 29 10:38:50 2021 -0500 More edits and fixes to docs/FAQ.md. commit b36fb0fbc5fda13d9a52cc64953341d3d53067ee Author: Field G. Van Zee Date: Tue Sep 28 18:47:45 2021 -0500 Fixed newly broken link to CREDITS in FAQ.md. commit 3442d4002b3bfffd8848f72103b30691df2b19b1 Author: Field G. Van Zee Date: Tue Sep 28 18:43:23 2021 -0500 More minor fixes to FAQ.md and Sandboxes.md. commit 89aaf00650d6cc19b83af2aea6c8d04ddd3769cb Author: Field G. Van Zee Date: Tue Sep 28 18:34:33 2021 -0500 Updates to FAQ.md, Sandboxes.md, and README.md. Details: - Updated FAQ.md to include two new questions, reordered an existing question, and also removed an outdated and redundant question about BLIS vs. AMD BLIS. - Updated Sandboxes.md to use 'gemmlike' as its main example, along with other smaller details. - Added ARM as a funder to README.md. commit c52c43115ec2264fda9380c48d9e6bb1e1ea2ead Merge: 1fc23d21 1f527a93 Author: Field G. Van Zee Date: Sun Sep 26 15:56:54 2021 -0500 Merge branch 'dev' commit 1fc23d2141189c7b583a5bff2cffd87fd5261444 Author: Field G. Van Zee Date: Tue Sep 21 14:54:20 2021 -0500 Safelist 'master', 'dev', 'amd' branches. Details: - Modified .travis.yml so that only commits to 'master', 'dev', and 'amd' branches get built by Travis CI. Thanks to Devin Matthews for helping to track down the syntax for this change. commit 1f527a93b996093e06ef7a8e94fb47ee7e690ce0 Author: Field G. Van Zee Date: Mon Sep 20 17:56:36 2021 -0500 Re-enable and fix fb93d24. Details: - Re-enabled the changes made in fb93d24. - Defined BLIS_ENABLE_SYSTEM in bli_arch.c, bli_cpuid.c, and bli_env.c, all of which needed the definition (in addition to config_detect.c) in order for the configure-time hardware detection binary to be compiled properly. Thanks to Minh Quan Ho for helping identify these additional files as needing to be updated. - Added additional comments to all four source files, most notably to prompt the reader to remember to update all of the files when updating any of the files. Also made the cpp code in each of the files as consistent/similar as possible. - Refer to issues #532 and PR #546 for more history. commit 7b39c1492067de941f81b49a3b6c1583290336fd Author: Field G. Van Zee Date: Mon Sep 20 16:13:50 2021 -0500 Reverted fb93d24. Details: - The latest changes in fb93d24 are still causing problems. Reverting and preparing to move them to a branch. commit fb93d242a4fef4694ce2680436da23087bbdd5fe Author: Field G. Van Zee Date: Mon Sep 20 15:42:08 2021 -0500 Re-enable and fix 8e0c425 (BLIS_ENABLE_SYSTEM). Details: - Re-enable the changes originally made in 8e0c425 but quickly reverted in 2be78fc. - Moved the #include of bli_config.h so that it occurs before the #include of bli_system.h. This allows the #define BLIS_ENABLE_SYSTEM or #define BLIS_DISABLE_SYSTEM in bli_config.h to be processed by the time it is needed in bli_system.h. This change should have been in the original 8e0c425, but was accidentally omitted. Thanks to Minh Quan Ho for catching this. - Add #define BLIS_ENABLE_SYSTEM to config_detect.c so that the proper cpp conditional branch executes in bli_system.h when compiling the hardware detection binary. The changes made in 8e0c425 were an attempt to support the definition of BLIS_OS_NONE when configuring with --disable-system (in issue #532). That commit failed because, aside from the required but omitted header reordering (second bullet above), AppVeyor was unable to compile the hardware detection binary as a result of missing Windows headers. This commit, which builds on PR #546, should help fix that issue. Thanks to Minh Quan Ho for his assistance and patience on this matter. commit eaa554aa52b879d181fdc87ba0bfad3ab6131517 Author: Minh Quan HO Date: Wed Sep 15 15:39:36 2021 +0200 bli_error: more cleanup on the error strings array - There was redundance between the macro BLIS_MAX_NUM_ERR_MSGS (=200) and the enum BLIS_ERROR_CODE_MAX (-170), while they both mean the same thing: the maximal number of error codes/messages. - The previous initialization of error messages at compile time ignored that the 'bli_error_string' array still occupies useless memory due to 2D char[][] declaration. Instead, it should be just an array of pointers, pointing at strings in .rodata section. - This commit does the two modifications: * retired macros BLIS_MAX_NUM_ERR_MSGS and BLIS_MAX_ERR_MSG_LENGTH everywhere * switch bli_error_string from char[][] to char *[] to reduce its footprint from 40KB (200*200) to 1.3KB (170*sizeof(char*)). (No problem to use the enum BLIS_ERROR_CODE_MAX at compile-time, since compiler is smart enough to determine its value is 170.) commit 52f29f739dbbb878c4cde36dbe26b82847acd4e9 Author: Field G. Van Zee Date: Fri Sep 17 08:38:29 2021 -0500 Removed last vestige of #define BLIS_NUM_ARCHS. Details: - Removed the commented-out #define BLIS_NUM_ARCHS in bli_type_defs.h and its associated (now outdated) comments. BLIS_NUM_ARCHS has been part of the arch_t enum for some time now, and so this change is mostly about removing any opportunity for confusion for people who may be reading the code. Thanks to Minh Quan Ho for leading me to cleanup. commit 849aae09f4fbf8d7abf11f4df1471f1d057e874b Author: Field G. Van Zee Date: Thu Sep 16 14:47:45 2021 -0500 Added new packm var3 to 'gemmlike'. Details: - Defined a new packm variant for the 'gemmlike' sandbox. This new variant (bls_l3_packm_var3.c) parallelizes the packing operation over the k dimension rather than the m or n dimensions. Note that the gemmlike implementation still uses var1 by default, and use of the new code would require changing bls_l3_packm_a.c and/or bls_l3_packm_b.c so that var3 is called instead. Thanks to Jeff Diamond for proposing this (perhaps NUMA-friendly) solution. commit b6f71fd378b7cd0cdc5c780e0b8c975a7abde998 Merge: 9293a68e e3dc1954 Author: Devin Matthews Date: Thu Sep 16 12:24:33 2021 -0500 Merge pull request #544 from flame/haswell-gemmsup-fpe Fix more copy-paste errors in the haswell gemmsup code. commit e3dc1954ffb5eee2a8b41fce85ba589f75770eea Author: Devin Matthews Date: Thu Sep 16 10:59:37 2021 -0500 Fix problem where uninitialized registers are included in vhaddpd in the Mx1 gemmsup kernels for haswell. The fix is to use the same (valid) source register twice in the horizontal addition. commit 5191c43faccf45975f577c60b9089abee25722c9 Author: Devin Matthews Date: Thu Sep 16 10:16:17 2021 -0500 Fix more copy-paste errors in the haswell gemmsup code. Fixes #486. commit 30c29b256ef13f0141ca9e9169cbdc7a45ce3a61 Author: RuQing Xu Date: Thu Sep 16 05:01:03 2021 +0900 Arm SVE Exclude SVE-Intrinsic Kernels for GCC 8-9 Affected configs: a64fx. commit bffa85be59dece8e756b9444e762f18892c06ee1 Author: RuQing Xu Date: Thu Sep 16 04:31:45 2021 +0900 Arm SVE: Correct PACKM Ker Name: Intrinsic Kers SVE-Intrinsic-based kernels ought not to use asm in their names. commit 9293a68eb6557a9ea43a846435908c3d52d4218b Merge: ade10f42 98ce6e8b Author: Devin Matthews Date: Fri Sep 10 14:13:29 2021 -0500 Merge pull request #534 from flame/cxx_test Add test to Travis using C++ compiler to make sure blis.h is C++-compatible commit 98ce6e8bc916e952510872caa60d818d62a31e69 Author: Devin Matthews Date: Fri Sep 10 14:12:13 2021 -0500 Do a fast test on OSX. [ci skip] commit c76fcad0c2836e7140b6bef3942e0a632a5f2cda Author: Devin Matthews Date: Fri Sep 10 13:57:02 2021 -0500 Fix AArch64 tests and consolidate some other tests. commit e486d666ffefee790d5e39895222b575886ac1ea Author: Devin Matthews Date: Fri Sep 10 13:50:16 2021 -0500 Use C++ cross-compiler for ARM tests. commit fbb3560cb8e2aeab205c47c2b096d4fa306d93db Author: Devin Matthews Date: Fri Sep 10 13:38:27 2021 -0500 Attempt to fix cxx-test for OOT builds. commit 9c0064f3f67d59263c62d57ae19605562bb87cc2 Author: Devin Matthews Date: Fri Sep 10 10:39:04 2021 -0500 Fix config_name in bli_arch.c commit ade10f427835d5274411cafc9618ac12966eb1e7 Author: Field G. Van Zee Date: Fri Aug 27 12:47:12 2021 -0500 Updated travis-ci.org link in README.md to .com. commit 2be78fc97777148c83d20b8509e38aa1fc1b4540 Author: Field G. Van Zee Date: Fri Aug 27 12:17:26 2021 -0500 Disabled (at least temporarily) commit 8e0c425. Details: - Reverted changes in 8e0c425 due to AppVeyor build failures that we do not yet understand. commit 820f11a4694aee5f234e24277aecca40885ae9d4 Author: RuQing Xu Date: Fri Aug 27 13:40:26 2021 +0900 Arm Whole GEMMSUP Call Route is Asm/Int Optimized - `ref2` call in `bli_gemmsup_rv_armv8a_asm_d6x8m.c` is commented out. - `bli_gemmsup_rv_armv8a_asm_d4x8m.c` contains a tail `ref2` call but it's not called by any upper routine. commit 8e0c4255de52a0a5cffecbebf6314aa52120ebe4 Author: Field G. Van Zee Date: Thu Aug 26 15:29:18 2021 -0500 Define BLIS_OS_NONE when using --disable-system. Details: - Modified bli_system.h so that the cpp macro BLIS_OS_NONE is defined when BLIS_DISABLE_SYSTEM is defined. Otherwise, the previous OS- detecting macro conditionals are considered. This change is to accommodate a solution to a cross-compilation issue described in #532. commit d6eb70fbc382ad7732dedb4afa01cf9f53e3e027 Author: Field G. Van Zee Date: Thu Aug 26 13:12:39 2021 -0500 Updated stale calls to malloc_intl() in gemmlike. Details: - Updated two out-of-date calls to bli_malloc_intl() within the gemmlike sandbox. These calls to malloc_intl(), which resided in bls_l3_decor_pthreads.c, were missing the err_t argument that the function uses to report errors. Thanks to Jeff Diamond for helping isolate this issue. commit 2f7325b2b770a15ff8aaaecc087b22238f0c67b7 Author: Field G. Van Zee Date: Mon Aug 23 15:04:05 2021 -0500 Blacklist clang10/gcc9 and older for 'armsve'. Details: - Prohibit use of clang 10.x and older or gcc 9.x and older for the 'armsve' subconfiguration. Addresses issue #535. commit 7e2951e61fda1c325d6a76ca9956253482d84924 Author: RuQing Xu Date: Mon Aug 23 17:06:44 2021 +0900 Arm: DGEMMSUP `Macro' Edge Cases Stop Calling Ref Ref cannot handle panel strides (packed cases) thus cannot be called from the beginning of `gemmsup` (i.e. cannot be dispatch target of gemmsup to other sizes.) commit 4fd82b0e9348553d83e258bd4969e49a81f8fcf0 Author: RuQing Xu Date: Mon Aug 23 05:18:32 2021 +0900 Header Typo commit 35409ebe67557c0e7cf5ced138c8166c9c1c909f Author: RuQing Xu Date: Mon Aug 23 04:51:47 2021 +0900 Arm: DGEMMSUP ??r(rv) Invoke Edge Size Plus some fix at edges. TODO: Should ensure that no ref kernel appear in beginning of gemmsup kernels. As ref does not recognise panel stride. commit a361492c24fdd919ee037763fc6523e8d7d2967a Author: RuQing Xu Date: Mon Aug 23 01:13:39 2021 +0900 Arm: DGEMMSUP ?rc(rd) Invoke Edge Size commit eaea67401c2ab31f2e51eede59725f64c1a21785 Merge: 5fc65cdd e320ec6d Author: Devin Matthews Date: Sat Aug 21 16:09:31 2021 -0500 Merge branch 'master' into cxx_test commit 5fc65cdd9e4134c5dcb16d21cd4a79ff426ca9f3 Author: Devin Matthews Date: Sat Aug 21 15:59:27 2021 -0500 Add test to Travis using C++ compiler to make sure blis.h is C++-compatible. commit e320ec6d5cd44e03cb2e2faa1d7625e84f76d668 Author: Field G. Van Zee Date: Fri Aug 20 17:15:20 2021 -0500 Moved lang defs from _macro_def.h to _lang_defs.h. Details: - Moved miscellaneous language-related definitions, including defs related to the handling of the 'restrict' keyword, from the top half of bli_macro_defs.h into a new file, bli_lang_defs.h, which is now #included immediately after "bli_system.h" in blis.h. This change is an attempt to fix a report of recent breakage of C++ compilers due to the recent introduction of 'restrict' in bli_type_defs.h (which previously was being included *before* bli_macro_defs.h and its restrict handling therein. Thanks to Ivan Korostelev for reporting this issue in #527. - CREDITS file update. commit e6799b26a6ecf1e80661a77d857d1c9e9adf50dc Author: RuQing Xu Date: Sat Aug 21 02:39:38 2021 +0900 Arm: Implement GEMMSUP Fallback Method bli_dgemmsup_rv_armv8a_int_6x4mn commit 7d5903d8d7570090eb37c592094424d1c64805d1 Author: RuQing Xu Date: Sat Aug 21 01:55:50 2021 +0900 Arm64 Fix: Support Alpha/Beta in GEMMSUP Intrin Forgot to support `alpha`/`beta` in gemmsup_armv8a_int. commit 3b275f810b2479eb5d6cf2296e97a658cf1bb769 Author: Field G. Van Zee Date: Thu Aug 19 16:06:46 2021 -0500 Minor tweaks to gemmlike sandbox. Details: - In the gemmlike sandbox, changed the loop index variable of inner loop of packm_cxk() from 'd' to 'i' (and likewise for the corresponding inlined code within packm_var2()). - Pack matrices A and B using packm_var1() instead of packm_var2(). commit 3eccfd456e7e84052c9a429dcde1183a7ecfaa48 Author: Field G. Van Zee Date: Thu Aug 19 13:22:10 2021 -0500 Added local _check() code to gemmlike sandbox. Details: - Added code to the gemmlike sandbox that handles parameter checking. Previously, the gemmlike implementation called bli_gemm_check(), which resides within the BLIS framework proper. Certain modifications that a user may wish to perform on the sandbox, such as adding a new matrix or vector operand, would have required additional checks, and so these changes make it easier for such a person to implement those checks for their custom gemm-like operation. commit 7144230cdb0653b70035ddd91f7f41e06ad8d011 Author: Field G. Van Zee Date: Wed Aug 18 13:25:39 2021 -0500 README.md citation updates (e.g. BLIS7 bibtex). commit 4a955e939044cfd2048cf9f3e33024e3ad1fbe00 Author: Field G. Van Zee Date: Mon Aug 16 13:49:27 2021 -0500 Tweaks to gemmlike to facilitate 3rd party mods. Details: - Changed the implementation in the 'gemmlike' sandbox to more easily allow others to provide custom implementations of packm. These changes include: - Calling a local version of packm_cxk() that can be modified. This version of packm_cxk() uses inlined loops in packm_cxk() rather than querying the context for packm kernels (or even using scal2m). - Providing two variants of packm, one of which calls the aforementioned packm_cxk(), the other of which inlines the contents of packm_cxk() into the variant itself, making it self-contained. To switch from one to the other, simply change which function gets called within bls_packm_a() and bls_packm_b(). - Simplified and cleaned up some variant names in both variants of packm, relative to their parent code. commit 2c0b4150e40c83ea814f69ca766da74c19ed0a58 Merge: c99fae50 4b8ed99d Author: Devin Matthews Date: Sat Aug 14 18:41:35 2021 -0500 Merge pull request #527 from flame/obj_t_makeover Implement proposed new function pointer fields for obj_t. commit 4b8ed99d926876fbf54c15468feae4637268eb6b Author: Field G. Van Zee Date: Fri Aug 13 15:31:10 2021 -0500 Whitespace tweaks. commit c99fae50ac3de0b5380a085aeebebfe67a645407 Merge: e6d68bc4 4f70eb79 Author: Devin Matthews Date: Fri Aug 13 14:48:00 2021 -0500 Merge pull request #530 from flame/fix_clang_warnings Clean up some warnings that show up on clang/OSX. commit e6d68bc4fd0981bea90d7f045779cacfe53f6ae8 Merge: 20a1c401 ec06b6a5 Author: Devin Matthews Date: Fri Aug 13 14:47:46 2021 -0500 Merge pull request #529 from flame/fix_make_check_dependencies Add dependency on the "flat" blis.h file for the BLIS and BLAS testuite objects. commit 1772db029e10e0075b5a59d3fb098487b1ad542a Author: Devin Matthews Date: Fri Aug 13 14:46:35 2021 -0500 Add row- and column-strides for A/B in obj_ukr_fn_t. commit 4f70eb7913ad3ded193870361b6da62b20ec3823 Author: Devin Matthews Date: Fri Aug 13 11:12:43 2021 -0500 Clean up some warnings that show up on clang/OSX. commit 3cddce1e2a021be6064b90af30022b99cbfea986 Author: Devin Matthews Date: Thu Aug 12 22:32:34 2021 -0500 Remove schema field on obj_t (redundant) and add new API functions. commit ec06b6a503a203fa0cdb23273af3c0e3afeae7fa Author: Devin Matthews Date: Thu Aug 12 19:27:31 2021 -0500 Add dependency on the "flat" blis.h file for the BLIS and BLAS testsuite objects. This fixes a bug where "make -j check" may fail after a change to one or more header files, or where testsuite code doesn't get properly recompiled after internal changes. commit 20a1c4014c999063e6bc1cfa605b152454c5cbf4 Author: Field G. Van Zee Date: Thu Aug 12 14:44:04 2021 -0500 Disabled sanity check in bli_pool_finalize(). Details: - Disabled a sanity check in bli_pool_finalize() that was meant to alert the user if a pool_t was being finalized while some blocks were still checked out. However, this is exactly the situation that might happen when a pool_t is re-initialized for a larger blocksize, and currently bli_pool_reinit() is implemeneted as _finalize() followed by _init(). So, this sanity check is not universally appropriate. Thanks to AMD-India for reporting this issue. commit e366665cd2b5ae8d7683f5ba2de345df0a41096f Author: Field G. Van Zee Date: Thu Aug 12 14:06:53 2021 -0500 Fixed stale API calls to membrk API in gemmlike. Details: - Updated stale calls to the bli_membrk API within the 'gemmlike' sandbox. This API is now called bli_pba (packed block allocator). Ideally, this forgotten update would have been included as part of 21911d6, which is when the branch where the membrk->pba changes was introduced was merged into 'master'. - Comment updates. commit e38ca28689f31c5e5bd2347704dc33042e5ea176 Author: RuQing Xu Date: Fri Aug 13 03:21:19 2021 +0900 Added Apple Firestorm (A14/M1) Subconfig - Use the same bulk kernel as Cortex-A53 / ThunderX2; - Larger block size; - Use gemmsup kernels for double precision. commit 3df0e9b653fbb1293cad93010273eea579e753d9 Author: RuQing Xu Date: Sat Jul 17 04:21:53 2021 +0900 Arm64 8x4 Kernel Use Less Regs commit 4e7e225057a05b9722ce65ddf75a9c31af9fbf36 Author: RuQing Xu Date: Wed Jun 9 15:46:36 2021 +0900 Armv8-A Supplimentary GEMMSUP Sizes for RD commit c792d506ba09530395c439051727631fd164f59a Author: RuQing Xu Date: Sat Jun 5 04:20:24 2021 +0900 Armv8-A Fix GEMMSUP-RD Kernels on GNU Asm Suffixed NEON opcode is not supported by GNU assembler commit ce4473520975c2c8790c82c65a69d75f8ad758ea Author: RuQing Xu Date: Sat Jun 5 04:08:14 2021 +0900 Armv8-A Adjust Types for PACKM Kernels GCC does not have full NEON intrinsics support. commit 8a32d19af85b61af92fcab1c316fb3be1a8d42ce Author: RuQing Xu Date: Sat Jun 5 03:31:30 2021 +0900 Armv8-A GEMMSUP-RD 6x8m Armv8-A now has a complete set of GEMMSUP kernels.. commit afd0fa6ad1889ed073f781c8aa8635f99e76b601 Author: RuQing Xu Date: Sat Jun 5 01:19:01 2021 +0900 Armv8-A GEMMSUP-RD 6x8n commit 3c5f7405148ab142dee565d00da331d95a7a07b9 Author: RuQing Xu Date: Fri Jun 4 21:50:51 2021 +0900 Armv8-A s/d Packing Kernels Fix Typo For GCC. commit 49b05df7929ec3abc0d27b475d2d406116fe2682 Author: RuQing Xu Date: Fri Jun 4 18:04:59 2021 +0900 Armv8-A Introduced s/d Packing Kernels Sizes according to the 2014 kernels. commit c3faf93168c3371ff48a2d40d597bdb27021cad4 Author: RuQing Xu Date: Thu Jun 3 23:09:05 2021 +0900 Armv8-A DGEMMSUP 6x8m Kernel Recommended kernels set: ... BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, TRUE, BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, TRUE, BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, TRUE, BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, TRUE, BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, TRUE, BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, TRUE, ... bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1, -1, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); ... commit 3efe707b5500954941061d4c2363d6ed41d17233 Author: RuQing Xu Date: Thu Jun 3 17:20:57 2021 +0900 Armv8-A DGEMMSUP Adjustments commit 8ed8f5e625de9b77a0f14883283effe79af01771 Author: RuQing Xu Date: Thu Jun 3 16:37:37 2021 +0900 Armv8-A Add More DGEMMSUP - Add 6x8 GEMMSUP. - Adjust prefetching. - Workaround for Clang's disability to handle reg clobbering. - Subproduct 6x8 row-major GEMM <- incomplete. commit a9ba79ea14de3b5a271e5970cb473d3c52e2fa5f Author: RuQing Xu Date: Wed Jun 2 15:04:29 2021 +0900 Armv8-A Add GEMMSUP 4x8n Kernel - Compile w/ both GCC & Clang. - Edge cases use ref-kernels. - Can give performance boost in some contexts. commit df40efe8fbfd399d76c6000ec03791a9b76ffbdf Author: RuQing Xu Date: Wed Jun 2 00:04:20 2021 +0900 Armv8-A Add Part of GEMMSUP 8x4m Kernel - Compile w/ both GCC & Clang - Only block part is implement. Edge cases WIP - Not Optimal kernel scheme. Should do 4x8 instead commit 66399992881316514f64d68ec9eb60a87d53f674 Author: RuQing Xu Date: Sat May 29 05:52:05 2021 +0900 Armv8A DGEMM 4x4 Kernel WIP. Slow Quite slow. commit a29c16394ccef02d29141c79b71fb408e20073e6 Author: RuQing Xu Date: Sat May 29 04:58:45 2021 +0900 Armv8-A Add 8x4 Kernel WIP Test result: a bit lower GFlOps than 6x8. commit 64a1f786d58001284aa4f7faf9fae17f0be7a018 Author: Devin Matthews Date: Wed Aug 11 17:53:12 2021 -0500 Implement proposed new function pointer fields for obj_t. The added fields: 1. `pack_t schema`: storing the pack schema on the object allows the macrokernel to act accordingly without side-channel information from the rntm_t and cntx_t. The pack schema and "pack_[ab]" fields could be removed from those structs. 2. `void* user_data`: this field can be used to store any sort of additional information provided by the user. The pointer is propagated to submatrix objects and copies, but is otherwise ignored by the framework and the default implementations of the following three fields. User-specified pack, kernel, or ukr functions can do whatever they want with the data, and the user is 100% responsible for allocating, assigning, and freeing this buffer. 3. `obj_pack_fn_t pack`: the function called when a matrix is packed. This functions receives the expected arguments, as well as a mdim_t and mem_t* as memory must be allocated inside this function, and behavior may differ based on which matrix is being backed (i.e. transposition for B). This could also be achieved by passing a desired pack schema, but this would require additional information to travel down the control tree. 4. `obj_ker_fn_t ker`: the function called when we get to the "second loop", or the macro-kernel. Behavior may depend on the pack schemas of the input matrices. The default implementation would perform the inner two loops around the ukr, and then call either the default ukr or a user-supplied one (next field). 5. `obj_ukr_fn_t ukr`: the function called by the default macrokernel. This would replace the various current "virtual" microkernels, and could also be used to supply user-defined behavior. Users could supply both a custom kernel (above) and microkernel, although the user-specified kernel does **not** necessarily have to call the ukr function specified on the obj_t. Note that no macros or functions for accessing these new fields have been defined yet. That is next once these are finalized. Addresses https://github.com/flame/blis/projects/1#card-62357687. commit a32257eeab2e9946e71546a05a1847a39341ec6b Author: Field G. Van Zee Date: Thu Aug 5 16:23:02 2021 -0500 Fixed bli_init.c compile-time error on OSX clang. Details: - Fixed a compile-time error in bli_init.c when compiling with OSX's clang. This error was introduced in 868b901, which introduced a post-declaration struct assignment where the RHS was a struct initialization expression (i.e. { ... }). This use of struct initializer expressions apparently works with gcc despite it not being strict C99. The fix included in this commit declares a temporary variable for the purposes of being initialized to the desired value, via the struct initializer, and then copies the temporary struct (via '=' struct assignment) to the persistent struct. Thanks to Devin Matthews for his help with this. commit c8728cfbd19ecde9d43af05829e00bcfe7d86eed Author: Field G. Van Zee Date: Thu Aug 5 15:17:09 2021 -0500 Fixed configure breakage on OSX clang. Details: - Accept either 'clang' or 'LLVM' in vendor string when greping for the version number (after determining that we're working with clang). Thanks to Devin Matthews for this fix. commit 868b90138e64c873c780d9df14150d2a370a7a42 Author: Field G. Van Zee Date: Wed Aug 4 18:31:01 2021 -0500 Fixed one-time use property of bli_init() (#525). Details: - Fixes a rather obvious bug that resulted in segmentation fault whenever the calling application tried to re-initialize BLIS after its first init/finalize cycle. The bug resulted from the fact that the bli_init.c APIs made no effort to allow bli_init() to be called subsequent times at all due to it, and bli_finalize(), being implemented in terms of pthread_once(). This has been fixed by resetting the pthread_once_t control variable for initialization at the end of bli_finalize_apis(), and by resetting the control variable for finalization at the end of bli_init_apis(). Thanks to @lschork2 for reporting this issue (#525), and to Minh Quan Ho and Devin Matthews for suggesting the chosen solution. - CREDITS file update. commit 8dba1e752c6846a85dea50907135bbc5cbc54ee5 Author: Field G. Van Zee Date: Tue Jul 27 12:38:24 2021 -0500 CREDITS file update. commit cc9206df667b7c710b57b190b8ad351176de53b8 Author: Field G. Van Zee Date: Fri Jul 16 15:48:37 2021 -0500 Added Graviton2 Neoverse N1 performance results. Details: - Added single-threaded and multithreaded performance results to docs/Performance.md. These results were gathered on a Graviton2 Neoverse N1 server. Special thanks to Nicholai Tukanov for collecting these results via the Arm-HPC/AWS hackaton. - Corrected what was supposed to be a temporary tweak to the legend labels in test/3/octave/plot_l3_perf.m. commit fab5c86d68137b59800715efb69214c0a7e458a7 Merge: 84f9dcd4 d073fc9a Author: Devin Matthews Date: Tue Jul 13 16:46:21 2021 -0500 Merge pull request #516 from nicholaiTukanov/p10-sandbox-rework P10 sandbox rework commit 84f9dcd449fa7a4cf4087fca8ec4ca0d10e9b801 Author: Devin Matthews Date: Tue Jul 13 16:45:44 2021 -0500 Remove unnecesary windows/zen2 directory. commit 21911d6ed3438ca4ba942d05851ba5d7e9835586 Merge: 17729cf4 689fa0f4 Author: Field G. Van Zee Date: Fri Jul 9 18:10:46 2021 -0500 Merge branch 'dev' commit 17729cf449919d1db9777cea5b65d2efc77e2692 Author: Devin Matthews Date: Fri Jul 9 14:59:48 2021 -0500 Add vzeroupper to Haswell microkernels. (#524) Details: - Added vzeroupper instruction to the end of all 'gemm' and 'gemmtrsm' microkernels so as to avoid a performance penalty when mixing AVX and SSE instructions. These vzeroupper instructions were once part of the haswell kernels, but were inadvertently removed during a source code shuffle some time ago when we were managing duplicate 'haswell' and 'zen' kernel sets. Thanks to Devin Matthews for tracking this down and re-inserting the missing instructions. commit c9a7f59aa84daa54d8f8c771f1f1ef2bd8730da2 Merge: 75f03907 9a8e649c Author: Devin Matthews Date: Thu Jul 8 14:00:38 2021 -0500 Merge pull request #522 from flame/windows-avx512 Fix Win64 AVX512 bug. commit 9a8e649c5ac89eba951bbee7136ca28aeb24d731 Author: Devin Matthews Date: Wed Jul 7 15:23:57 2021 -0500 Fix Win64 AVX512 bug. Use `-march=haswell` for kernels. Fixes #514. commit 75f03907c58385b656c8bd35d111db245814a9f3 Author: Devin Matthews Date: Wed Jul 7 15:44:11 2021 -0500 Add comment about make checkblas on Windows [ci skip] commit 4651583b1204a965e4aa672c7ad6de60f3ab1600 Merge: 69205ac2 174f7fc9 Author: Devin Matthews Date: Wed Jul 7 01:11:20 2021 -0500 Merge pull request #520 from flame/travis-ci-install Test installation in Travis CI commit 69205ac266947723ad4d7bb028b7521fe5c76991 Author: Field G. Van Zee Date: Tue Jul 6 20:39:22 2021 -0500 CREDITS file update. Details: - Thanks to Chengguo Sun for submitting #515 (5ef7f68). - Thanks to Andrew Wildman for submitting #519 (551c6b4). - Whitespace update to configure (spaces to tabs). commit 174f7fc9a11712c7bd1a61510bdc5c262b3e8e1f Author: Devin Matthews Date: Tue Jul 6 19:35:55 2021 -0500 Test installation in Travis CI commit 551c6b4ee8cd9dd2e1d1b46c8dde09eb50b91b2c Merge: 78eac6a0 f648df4e Author: Devin Matthews Date: Tue Jul 6 19:32:53 2021 -0500 Merge pull request #519 from awild82/oot_build_bugfix Fix installation from out-of-tree builds commit f648df4e5588f069b2db96f8be320ead0c1967ef Author: Andrew Wildman Date: Tue Jul 6 16:35:12 2021 -0700 Add symlink to blis.pc.in for out-of-tree builds commit 78eac6a0ab78c995c3f4e46a9e87388b5c3e1af6 Author: Devin Matthews Date: Tue Jul 6 11:05:43 2021 -0500 Revert "Always run `make check`." This reverts commit a201a53440c51244739aaee20e3309b50121cc68. commit a201a53440c51244739aaee20e3309b50121cc68 Author: Devin Matthews Date: Mon Jul 5 21:39:18 2021 -0500 Always run `make check`. I'm concerned that problems may lurk for `x86_64` builds on Windows which may be uncovered by a fuller `make check`. commit 5ef7f684dc75fc707c82f919e0836615f90a2627 Merge: aaa10c87 ad6231cc Author: Devin Matthews Date: Mon Jul 5 21:35:07 2021 -0500 Merge pull request #515 from chengguosun/bug-fix Fixed configure script bug. commit ad6231cca3fc1e477752ecd31b1ee2323398a642 Author: sunchengguo Date: Tue Jul 6 07:30:00 2021 -0400 Fixed configure script bug. Details: - Fixed kernel list string substitution error by adding function substitute_words in configure script. if the string contains zen and zen2, and zen need to be replaced with another string, then zen2 also be incorrectly replaced. commit d073fc9acac9d702556cab9fbbb3a253eeb1f998 Author: nicholaiTukanov Date: Fri Jul 2 19:54:33 2021 -0500 Update POWER10.md commit 907226c0af4afb6323b4e02be4f73f5fb89cddaf Author: nicholaiTukanov Date: Fri Jul 2 19:47:18 2021 -0500 Rework POWER10 sandbox - Add a testsuite for gathering performance (in GFLOPs) and measuring correctness for the POWER10 GEMM reduced precision/integer kernels. - Reworked GENERIC_GEMM template to hardcode the cache parameters. - Remove kernel wrapper that checked that only allowed matrices that weren't transposed or conjugated. However, the kernels still assume the matrices are not transposed. This wrapper was removed for performance reasons. - Renamed and restructured files and functions for clarity. - Editted the POWER10 document to reflect new changes. commit aaa10c87e19449674a4ca30fa3b6392bb22c3a66 Author: Field G. Van Zee Date: Mon Jun 21 17:53:52 2021 -0500 Skip clearing temp microtile in gemmlike sandbox. Details: - Removed code from gemmlike sandbox files bls_gemm_bp_var1.c and bls_gemm_bp_var2.c that initializes the elements of the temporary microtile to zero. This code, introduced recently in 7f7d726, did not actually fix any bug (despite that commit's log entry). The microtile does not need to be initialized because it is completely overwritten by a "beta = 0" invocation of gemm prior to it being read. Any NaNs or Infs present at the outset would have no impact on the output matrix C. Thanks to Devin Matthews for reminding me of this. commit bc10a3f2ff518360c32bea825b3eb62a9e4c8a77 Merge: bf727636 6548ceba Author: Devin Matthews Date: Fri Jun 18 19:01:08 2021 -0500 Merge pull request #492 from flame/thunderx2-clang Allow clang for ThunderX2 config commit bf727636632a368f3247dc8ab1d4b6119e9c511a Merge: e28f2a2d 5fc93e28 Author: Devin Matthews Date: Fri Jun 18 18:59:43 2021 -0500 Merge pull request #506 from xrq-phys/arm64-mac BLIS on Darwin_Aarch64 commit e28f2a2dfcff14e7094fce0b279b3a917b3ab98c Merge: d10e05bb 56ffca6a Author: Devin Matthews Date: Tue Jun 15 19:35:07 2021 -0500 Merge pull request #513 from nicholaiTukanov/asm_warning_p9_fix Fix assembler warning in POWER9 DGEMM commit 56ffca6a9bc67432a7894298739895f406e5f467 Author: nicholai Date: Tue Jun 15 18:17:39 2021 -0500 Fix asm warning commit 689fa0f40399bde1acc5367d6dd4e8fc4eb6f3ea Merge: b683d01b d10e05bb Author: Field G. Van Zee Date: Sun Jun 13 19:44:14 2021 -0500 Merge branch 'master' into dev commit d10e05bbd1ce45ce2c0dfe5c64daae2633357b3f Author: Field G. Van Zee Date: Sun Jun 13 19:36:16 2021 -0500 Sandbox header edits trigger full library rebuild. Details: - Adjusted the top-level Makefile so that any change to a sandbox header file will result in blis.h being regenerated along with a full recompilation of the library. Previously, sandbox files were omitted from the list of header files that, when touched, could trigger a full rebuild. Why was it like that previously? Because originally we only envisioned using sandboxes to *replace* gemm, not augment the library with new functionality. When replacing gemm, blis.h does not need to contain any local sandbox defintions in order for the user to be able to (indirectly) use that sandbox. But if you are adding functions to the library, those functions need to be prototyped so the compiler can perform type checking against the user's invocation of those new functions. Thanks to Jeff Diamond for helping us discover this deficiency in the build system. commit 7c3eb44efaa762088c190bb820ef6a3c87db8f65 Author: Devin Matthews Date: Wed Jun 2 11:28:22 2021 -0500 Add vhsubpd/vhsubpd. Horizontal subtraction instructions added to bli_x86_asm_macros.h, currently unused [ci skip]. commit 7f7d72610c25f511ba8cd2a53be7b59bdb80f3f3 Author: Field G. Van Zee Date: Mon May 31 16:50:18 2021 -0500 Fixed bugs in cpackm kernels, gemmlike code. Details: - Fixed intermittent bugs in bli_packm_haswell_asm_c3xk.c and bli_packm_haswell_asm_c8xk.c whereby the imaginary component of the kappa scalar was incorrectly loaded at an offset of 8 bytes (instead of 4 bytes) from the real component. This was almost certainly a copy- paste bug carried over from the corresonding zpackm kernels. Thanks to Devin Matthews for bringing this to my attention. - Added missing code to gemmlike sandbox files bls_gemm_bp_var1.c and bls_gemm_bp_var2.c that initializes the elements of the temporary microtile to zero. (This bug was never observed in output but rather noticed analytically. It probably would have also manifested as intermittent failures, this time involving edge cases.) - Minor commented-out/disabled changes to testsuite/src/test_gemm.c relating to debugging. commit 5fc93e280614b4a21a9cff36cf873b4b9407285b Author: RuQing Xu Date: Sat May 29 18:44:47 2021 +0900 Armv8A Rename Regs for Safe Darwin Compile Avoid x18 use in FP32 kernel: - C address lines x[18-26] renamed to x[19-27] (reg index +1) - Original role of x27 fulfilled by x5 which is free after k-loop pert. FP64 does not require changing since x18 is not used there. commit 9f4a4a3cfb2244e4024445e127dafd2a11f39fc5 Author: RuQing Xu Date: Sat May 29 17:21:28 2021 +0900 Armv8A Rename Regs for Clang Compile: FP32 Part Roughly the same as 916e1fa , additionally with x15 clobbering removed. - x15: Not used at all. Compilation w/ Clang shows warning about x18 reservation, but compilation itself is OK and all tests got passed. commit 916e1fa8be3cea0e3e2a4a7e8b00027ac2ee7780 Author: RuQing Xu Date: Sat May 29 16:46:52 2021 +0900 Armv8A Rename Regs for Clang Compile: FP64 Part - x7, x8: Used to store address for Alpha and Beta. As Alpha & Beta was not used in k-loops, use x0, x1 to load Alpha & Beta's addresses after k-loops are completed, since A & B's addresses are no longer needed there. This "ldr [addr]; -> ldr val, [addr]" would not cause much performance drawback since it is done outside k-loops and there are plenty of instructions between Alpha & Beta's loading and usage. - x9: Used to store cs_c. x9 is multiplied by 8 into x10 and not used any longer. Directly loading cs_c and into x10 and scale by 8 spares x9 straightforwardly. - x11, x12: Not used at all. Simply remove from clobber list. - x13: Alike x9, loaded and scaled by 8 into x14, except that x13 is also used in a conditional branch so that "cmp x13, #1" needs to be modified into "cmp x14, #8" to completely free x13. - x3, x4: Used to store next_a & next_b. Untouched in k-loops. Load these addresses into x0 and x1 after Alpha & Beta are both loaded, since then neigher address of A/B nor address of Alpha/Beta is needed. commit 7fabd896af773623ed01820a71bbff432e8a7d25 Author: RuQing Xu Date: Sat May 29 16:28:03 2021 +0900 Asm Flag Mingling for Darwin_Aarch64 Apple+Arm64 requires additional "tagging" of local symbols. commit 213dce32d2eed8b7a38c6a3f6112072b0a89ecd0 Author: Field G. Van Zee Date: Fri May 28 14:49:57 2021 -0500 Added a new 'gemmlike' sandbox. Details: - Added a new sandbox called 'gemmlike', which implements sequential and multithreaded gemm in the style of gemmsup but also unconditionally employs packing. The purpose of this sandbox is to (1) avoid select abstractions, such as objects and control trees, in order to allow readers to better understand how a real-world implementation of high-performance gemm can be constructed; (2) provide a starting point for expert users who wish to build something that is gemm-like without "reinventing the wheel." Thanks to Jeff Diamond, Tze Meng Low, Nicholai Tukanov, and Devangi Parikh for requesting and inspiring this work. - The functions defined in this sandbox currently use the "bls_" prefix instead of "bli_" in order to avoid any symbol collisions in the main library. - The sandbox contains two variants, each of which implements gemm via a block-panel algorithm. The only difference between the two is that variant 1 calls the microkernel directly while variant 2 calls the microkernel indirectly, via a function wrapper, which allows the edge case handling to be abstracted away from the classic five loops. - This sandbox implementation utilizes the conventional gemm microkernel (not the skinny/unpacked gemmsup kernels). - Updated some typos in the comments of a few files in the main framework. commit 82af05f54c34526a60fd2ec46656f13e1ac8f719 Author: Field G. Van Zee Date: Tue May 25 15:25:08 2021 -0500 Updated Fugaku (a64fx) performance results. Details: - Updated the performance graphs (pdfs and pngs) for the Fugaku/a64fx entry within Performance.md, and also updated the experiment details accordingly. Thanks to RuQing Xu for re-running the BLIS and SSL2 experiments reflected in this commit. - In Performance.md, added an English translation of the project name under which the Fugaku results were gathered, courtesy of RuQing Xu. commit e5c85da3763f73854ecd739ba3008bb467ed77c3 Merge: cbd8d393 5feb04e2 Author: Devin Matthews Date: Mon May 24 16:56:22 2021 -0500 Merge pull request #503 from flame/windows-compiler-check Add explicit compiler check for Windows. commit cbd8d3932599485727204479fded66ac19186db4 Merge: 6d4ab022 932dfe6a Author: Devin Matthews Date: Mon May 24 16:32:42 2021 -0500 Merge pull request #500 from xrq-phys/armsve+travis Upgrade Travis CI for Arm SVE commit 5feb04e233e1e6f81c727578ad9eae1367a2562f Author: Devin Matthews Date: Sun May 23 18:46:56 2021 -0500 Add explicit compiler check for Windows. Check the C compiler for a predefined macro `_WIN32` to indicate (cross-)compilation for Windows. Fixes #463. commit 6d4ab0223d9014ac2a66d66759536aa305be5867 Merge: 61584ded 859fb77a Author: Devin Matthews Date: Sun May 23 18:39:53 2021 -0500 Merge pull request #502 from flame/rm-rm-dupls Remove `rm-dupls` function in common.mk. commit 859fb77a320a3ace71d25a8885c23639b097a1b6 Author: Devin Matthews Date: Sun May 23 18:15:23 2021 -0500 Remove `rm-dupls` function in common.mk. AMD requested removal due to unclear licensing terms; original code was from stackoverflow. The function is unused but could easily be replaced by new implementation. commit 932dfe6abb9617223bd26a249e53447169033f8c Author: RuQing Xu Date: Thu May 20 02:07:31 2021 +0900 Travis CI Revert Unnecessary Extras from 91d3636 - Removed `V=1` in make line - Removed `CFLAGS` in configure line - Restored `pwd` surrounding OOT line commit bd156a210d347a073a6939cc4adab3d9256c2e2b Author: RuQing Xu Date: Sun May 16 02:56:14 2021 +0900 Adjust TravisCI - ArmSVE don't test gemmt (seems Qemu-only problem); - Clang use TravisCI-provided version instead of fixing to clang-8 due to that clang-8 seems conflicting with TravisCI's clang-7. commit 91d3636031021af3712d14c9fcb1eb34b6fe2a31 Author: RuQing Xu Date: Sat May 15 17:05:16 2021 +0900 Travis Support Arm SVE - Updated distro to 20.04 focal aarch64-gcc-10. This is minimal version required by aarch64-gcc-10. SVE intrinsics would not compile without GCC >=10. - x86 toolchains use official repo instead of ubuntu-toolchain-r/test. 20.04 focal is not supported by that PPA at the moment. - Add extra configuration-time options to .travis.yml. - Add Arm SVE entry to .travis.yml. commit 61584deddf9b3af6d11a811e6e04328d22390202 Author: RuQing Xu Date: Wed May 19 23:52:29 2021 +0900 Added 512b SVE-based a64fx subconfig + SVE kernels. Details: - Added 512-bit specific 'a64fx' subconfiguration that uses empirically tuned block size by Stepan Nassyr. This subconfig also sets the sector cache size and enables memory-tagging code in SVE gemm kernels. This subconfig utilizes (16, k) and (10, k) DPACKM kernels. - Added a vector-length agnostic 'armsve' subconfiguration that computes blocksizes according to the analytical model. This part is ported from Stepan Nassyr's repository. - Implemented vector-length-agnostic [d/s/sh] gemm kernels for Arm SVE at size (2*VL, 10). These kernels use unindexed FMLA instructions because indexed FMLA takes 2 FMA units in many implementations. PS: There are indexed-FLMA kernels in Stepan Nassyr's repository. - Implemented 512-bit SVE dpackm kernels with in-register transpose support for sizes (16, k) and (10, k). - Extended 256-bit SVE dpackm kernels by Linaro Ltd. to 512-bit for size (12, k). This dpackm kernel is not currently used by any subconfiguration. - Implemented several experimental dgemmsup kernels which would improve performance in a few cases. However, those dgemmsup kernels generally underperform hence they are not currently used in any subconfig. - Note: This commit squashes several commits submitted by RuQing Xu via PR #424. commit b683d01b9c4ea5f64c8031bda816beccfbf806a0 Author: Field G. Van Zee Date: Thu May 13 15:23:22 2021 -0500 Use extra #undef when including ba/ex API headers. Details: - Inserted a "#include bli_xapi_undef.h" after each usage of the basic and expert API macro setup headers: bli_oapi_ba.h, bli_oapi_ex.h, bli_tapi_ba.h, and bli_tapi_ex.h. This is functionally equivalent to the previous status quo, in which each header made minimal #undef prior to its own definitions and then a single instance of "#include bli_xapi_undef.h" cleaned up any remaining macro defs after all other headers were used. This commit will guarantee that macro defs from the setup of one header (say, bli_oapi_ex.h) don't "infect" the definitions made in a subsequent header. As with this previous commit, this change does not fix any issue but rather attempts to avoid creating orphaned macro definitions that are only needed within a very limited scope. - Removed minimal #undef from bli_?api_[ba|ex].h. - Removed old commented-out lines from bli_?api_[ba|ex].h. commit d4427a5b2f5cab5d2a64c58d87416628867c2b4a Author: Field G. Van Zee Date: Thu May 13 13:55:11 2021 -0500 Minor preprocessor/header cleanup. Details: - Added frame/include/bli_xapi_undef.h, which explicitly undefines all macros defined in bli_oapi_ba.h, bli_oapi_ex.h, bli_tapi_ba.h, and bli_tapi_ex.h. (This is for safety and good cpp coding practice, not because it fixes anything.) - Added #include "bli_xapi_undef.h" to bli_l1v.h, bli_l1d.h, bli_l1f.h, bli_l1m.h, bli_l2.h, bli_l3.h, and bli_util.h. - Comment updates to bli_oapi_ba.h, bli_oapi_ex.h, bli_tapi_ba.h, and bli_tapi_ex.h. - Moved frame/3/bli_l3_ft_ex.h to local 'old' directory after realizing that nothing in BLIS used those function pointer types. Also commented out the "#include bli_l3_ft_ex.h" directive in frame/3/bli_l3.h. commit 5aa63cd927b22a04e581b07d0b68ef391f4f9b1f Author: Field G. Van Zee Date: Wed May 12 19:53:35 2021 -0500 Fixed typo in cpp guard in bli_util_ft.h. Details: - Changed #ifdef BLIS_OAPI_BASIC to #ifdef BLIS_TAPI_BASIC in bli_util_ft.h. This typo was causing some types to be redefined when they weren't supposed to be. commit f0e8634775094584e89f1b03811ee192f2aaf67f Author: Field G. Van Zee Date: Wed May 12 18:45:32 2021 -0500 Defined eqsc, eqv, eqm to test object equality. Details: - Defined eqsc, eqv, and eqm operations, which set a bool depending on whether the two scalars, two vectors, or two matrix operands are equal (element-wise). eqsc and eqv support implicit conjugation and eqm supports diagonal offset, diag, uplo, and trans parameters (in a manner consistent with other level-1m operations). These operations are currently housed under frame/util, at least for now, because they are not computational in nature. - Redefined bli_obj_equals() in terms of eqsc, eqv, and eqm. - Documented eqsc, eqv, and eqm in BLISObjectAPI.md and BLISTypedAPI.md. Also: - Documented getsc and setsc in both docs. - Reordered entry for setijv in BLISTypedAPI.md, and added separator bars to both docs. - Added missing "Observed object properties" clauses to various levle-1v entries in BLISObjectAPI.md. - Defined bli_apply_trans() in bli_param_macro_defs.h. - Defined supporting _check() function, bli_l0_xxbsc_check(), in bli_l0_check.c for eqsc. - Programming style and whitespace updates to bli_l1m_unb_var1.c. - Whitespace updates to bli_l0_oapi.c, bli_l1m_oapi.c - Consolidated redundant macro redefinition for copym function pointer type in bli_l1m_ft.h. - Added macros to bli_oapi_ba.h, _ex.h, and bli_tapi_ba.h, _ex.h that allow oapi and tapi source files to forego defining certain expert functions. (Certain operations such as printv and printm do not need to have both basic expert interfaces. This also includes eqsc, eqv, and eqm.) commit 5d46dbee4a06ba5a422e19817836976f8574cb4f Author: Devin Matthews Date: Wed May 12 18:42:09 2021 -0500 Replace bli_dlamch with something less archaic (#498) Details: - Added new implementations of bli_slamch() and bli_dlamch() that use constants from the standard C library in lieu of dynamically-computed values (via code inherited from netlib). The previous implementation is still available when the cpp macro BLIS_ENABLE_LEGACY_LAMCH is defined by the subconfiguration at compile-time. Thanks to Devin Matthews for providing this patch, and to Stefano Zampini for reporting the issue (#497) that prompted Devin to propose the patch. commit 6a89c7d8f9ac3f51b5b4d8ccb2630d908d951e6f Author: Field G. Van Zee Date: Sat May 1 18:54:48 2021 -0500 Defined setijv, getijv to set/get vector elements. Details: - Defined getijv, setijv operations to get and set elements of a vector, in bli_setgetijv.c and .h. - Renamed bli_setgetij.c and .h to bli_setgetijm.c and .h, respectively. - Added additional bounds checking to getijm and setijm to prevent actions with negative indices. - Added documentation to BLISObjectAPI.md and BLISTypedAPI.md for getijv and setijv. - Added documentation to BLISTypedAPI.md for getijm and setijm, which were inadvertently missing. - Added a new entry to the FAQ titled "Why does BLIS have vector (level-1v) and matrix (level-1m) variations of most level-1 operations?" - Comment updates. commit 4534daffd13ed7a8983c681d3f5e9de17c9f0b96 Author: Field G. Van Zee Date: Tue Apr 27 18:16:44 2021 -0500 Minor API breakage in bli_pack API. Details: - Changed bli_pack_get_pack_a() and bli_pack_get_pack_b() so that instead of returning a bool, they set a bool that is passed in by address. This does break the public exported API, but I expect very few users actually use this function. (This change is being made in preparation for a much more extensive commit relating to error checking.) commit 6a4aa986ffc060d3e64ed230afe318b82630f8b2 Author: Field G. Van Zee Date: Fri Apr 23 13:10:01 2021 -0500 Fixed typo in Table of Contents. commit f6424b5b82160d346a09a0fbb526981ecf66cdb3 Author: Field G. Van Zee Date: Fri Apr 23 13:08:06 2021 -0500 Added dedicated Performance section to README.md. Details: - Spun off the Performance.md and PerformanceSmall.md links in the Documentation section into a new Performance section dedicated to those two links. (The previous entries remain redundantly listed within Documentation section.) Thanks to Robert van de Geijn for suggesting this change. commit 40ce5fd241b9ad140bf57278d440f0598d7f15d8 Merge: 6280757b 1f3461a5 Author: Devin Matthews Date: Wed Apr 21 09:54:25 2021 -0500 Merge pull request #493 from cassiersg/patch-1 Fix typo in FAQ.md commit 1f3461a5a5a88510f913451a93e3190ec1556f39 Author: Gaëtan Cassiers Date: Wed Apr 21 16:49:05 2021 +0200 Fix typo in FAQ.md commit 6548cebaf55a1f9bdb8417cc89dd0444d8f9c2e4 Author: Devin Matthews Date: Wed Apr 14 13:00:42 2021 -0500 Allow clang for ThunderX2 config Needed for compiling on e.g. Mac M1. AFAIK clang supports the same -mcpu flag for ThunderX2 as gcc. commit 6280757be32f90fd77d8dd9357b07d9306e6f80d Author: Field G. Van Zee Date: Wed Apr 7 13:03:56 2021 -0500 Minor updates to a64fx section of Performance.md. commit 1e6ed823c6cd11f9b671779f3c8bdbd2bbb40f34 Author: RuQing Xu Date: Thu Apr 8 02:59:26 2021 +0900 Additional A64fx Comments (#490) * Performance.md Update A64fx Comments - Reason for ARMPL's missing data; - Additional envs / flags for kernel selection; - Update BLIS SRC commit. * Include Another Fix in armsve-cfg-vendor A prototype was forgotten, causing that void* pointer was not fully returned. commit 2688f21a5b073950f6f187c95917fdbb5aac234a Author: Field G. Van Zee Date: Tue Apr 6 19:02:37 2021 -0500 Added Fujitsu A64fx (512-bit SVE) perf results. Details: - Added single-threaded and multithreaded performance results to docs/Performance.md. These results were gathered on the "Fugaku" Fujitsu A64fx supercomputer at the RIKEN Center for Computational Science in Kobe, Japan. Special thanks to RuQing Xu and Stepan Nassyr for their work in developing and optimizing A64fx support in BLIS and RuQing for gathering the performance data that is reflected in these new graphs. commit ba3ba8da83d48397162139e11337c036a631ba79 Author: Field G. Van Zee Date: Tue Apr 6 18:39:58 2021 -0500 Minor updates and fixes to test/3/octave scripts. Details: - Fixed an issue where the wrong string was being passed in for the vendor legend string. - Changed the graph in which the legends appear. - Updates to runthese.m. commit 09bd4f4f12311131938baa9f75d27e92b664d681 Author: Field G. Van Zee Date: Wed Mar 31 17:09:36 2021 -0500 Add err_t* "return" parameter to malloc functions. Details: - Added an err_t* parameter to memory allocation functions including bli_malloc_intl(), bli_calloc_intl(), bli_malloc_user(), bli_fmalloc_align(), and bli_fmalloc_noalign(). Since these functions already use the return value to return the allocated memory address, they can't communicate errors to the caller through the return value. This commit does not employ any error checking within these functions or their callers, but this sets up BLIS for a more comprehensive commit that moves in that direction. - Moved the typedefs for malloc_ft and free_ft from bli_malloc.h to bli_type_defs.h. This was done so that what remains of bli_malloc.h can be included after the definition of the err_t enum. (This ordering was needed because bli_malloc.h now contains function prototypes that use err_t.) - Defined bli_is_success() and bli_is_failure() static functions in bli_param_macro_defs.h. These functions provide easy checks for error codes and will be used more heavily in future commits. - Unfortunately, the additional err_t* argument discussed above breaks the API for bli_malloc_user(), which is an exported symbol in the shared library. However, it's quite possible that the only application that calls bli_malloc_user()--indeed, the reason it is was marked for symbol exporting to begin with--is the BLIS testsuite. And if that's the case, this breakage won't affect anyone. Nonetheless, the "major" part of the so_version file has been updated accordingly to 4.0.0. commit f9ad55ce7e12f59930605753959fcfd41a218d8d Merge: 04502492 90508192 Author: Field G. Van Zee Date: Wed Mar 31 14:20:19 2021 -0500 Merge branch 'master' into dev commit 90508192f2d6ae95adc2a3ba9f4e5bad2c8d6fd2 Author: Devin Matthews Date: Tue Mar 30 21:16:44 2021 -0500 Update do_sde.sh (#489) Update to a newer version of SDE, and do a direct download as it seems you don't have to click-through the license anymore. commit 22c6b5dc4c9cc21942f8ccc30891f9b4385a9504 Author: Nicholai Tukanov Date: Tue Mar 30 19:07:42 2021 -0500 Fixed bug in power10 microkernel I/O. (#488) Details: - Fixed a bug in the POWER10 DGEMM kernel whereby the microkernel did not store the microtile result correctly due to incorrect indices calculations. (The error was introduced when I reorganized the 'kernels/power10/3' directory.) commit 04502492671456b94bcdee60b9de347b6763a32d Author: Field G. Van Zee Date: Sun Mar 28 19:11:43 2021 -0500 Always stay initialized after BLAS compat calls. Details: - Removed the option to finalize BLIS after every BLAS call, which also means that BLIS would initialize at the beginning of every BLAS call. This option never really made sense and wasn't even implemented properly to begin with. (Because bli_init_auto() and _finalize_auto() were implemented in terms of bli_init_once() and _finalize_once(), respectively, the application would have only been able to call one BLAS routine before BLIS would find itself in a unusable, permanently uninitialized state.) Because this option was never meant for regular use, it never made it into configure as an actual configure-time option, and therefore this commit only removes parts of the code affected by the cpp macro guard BLIS_ENABLE_STAY_AUTO_INITIALIZED. commit 3a6f41afb8197e831b6ce2f1ae7f63735685fa0a Author: Field G. Van Zee Date: Sat Mar 27 17:22:14 2021 -0500 Renamed membrk files/vars/functions to pba. Details: - Renamed the files, variables, and functions relating to the packing block allocator from its legacy name (membrk) to its current name (pba). This more clearly contrasts the packing block allocator with the small block allocator (sba). - Fixed a typo in bli_pack_set_pack_b(), defined in bli_pack.c, that caused the function to erroneously change the value of the pack_a field of the global rntm_t instead of the pack_b field. (Apparently nobody has used this API yet.) - Comment updates. commit 36cb4116d15cfef2d42ec4a834efd4a958f261b5 Author: Field G. Van Zee Date: Sat Mar 27 15:15:09 2021 -0500 Switch allocator mutexes to static initialization. Details: - Switched the small block allocator (sba), as defined in bli_sba.c and bli_apool.c, to static initialization of its internal mutex. Did a similar thing for the packing block allocator (pba), which appears as global_membrk in bli_membrk.c. - Commented out bli_membrk_init_mutex() and bli_membrk_finalize_mutex() to ensure they won't be used in the future. - In bli_thrcomm_pthreads.c and .h, removed old, commented-out cpp blocks guarded by BLIS_USE_PTHREAD_MUTEX. commit 159ca6f01a5f91b93513134c9470b69ff78f5354 Author: Field G. Van Zee Date: Wed Mar 24 15:57:32 2021 -0500 Made test/3/octave scripts robust to missing data. Details: - Modified the octave scripts in test/3 so that the script does not choke when one or more of the expected OpenBLAS, Eigen, or vendor data files is missing. (The BLIS data set, however, must be complete.) When a file is missing, that data series is simply not included on that particular graph. Also factored out a lot of the redundant logic from plot_panel_4x5.m into a separate function in read_data.m. commit 545e6c2f6d09d023b353002a9a43b11aa0c1d701 Author: Field G. Van Zee Date: Mon Mar 22 17:42:33 2021 -0500 CHANGELOG update (0.8.1) commit 8535b3e11d2297854991c4272932ce4974dda629 Author: Field G. Van Zee Date: Mon Mar 22 17:42:33 2021 -0500 Version file update (0.8.1) commit e56d9f2d94ed247696dda2cbf94d2ca05c7fc089 Author: Field G. Van Zee Date: Mon Mar 22 17:40:50 2021 -0500 ReleaseNotes.md update in advance of next version. commit ca83f955d45814b7d84f53933cdb73323c0dea2c Author: Field G. Van Zee Date: Mon Mar 22 17:21:21 2021 -0500 CREDITS file update. commit 57ef61f6cdb86957f67212aa59407f2f8e7f3d1a Merge: bf1b578e e7a4a8ed Author: Field G. Van Zee Date: Fri Mar 19 13:05:43 2021 -0500 Merge branch 'master' of github.com:flame/blis commit bf1b578ea32ea1c9dbf7cb3586969e8ae89aa5ef Author: Field G. Van Zee Date: Fri Mar 19 13:03:17 2021 -0500 Reduced KC on skx from 384 to 256. Details: - Reduced the KC cache blocksize for double real on the skx subconfig from 384 to 256. The maximum (extended) KC was also reduced accordingly from 480 to 320. Thanks to Tze Meng Low for suggesting this change. commit e7a4a8edc940942357e8e4c4594383a29a962f93 Author: Nicholai Tukanov Date: Wed Mar 17 19:43:31 2021 -0500 Fix calculation of new pb size (#487) Details: - Added missing parentheses to the i8 and i4 instantiations of the GENERIC_GEMM macro in sandbox/power10/generic_gemm.c. commit 4493cf516e01aba82642a43abe350943ba458fe2 Author: Field G. Van Zee Date: Mon Mar 15 13:12:49 2021 -0500 Redefined BLIS_NUM_ARCHS to update automatically. Details: - Changed BLIS_NUM_ARCHS from a cpp macro definition to the last enum value in the arch_t enum. This means that it no longer needs to get updated manually whenever new subconfigurations are added to BLIS. Also removed the explicit initial index assigment of 0 from the first enum value, which was unnecessary due to how the C language standard mandates indexing of enum values. Thanks to Devin Matthews for originally submitting this as a PR in #446. - Updated docs/ConfigurationHowTo.md to reflect the aforementioned change. commit a4b73de84cdffcbe5cf71969a0f7f0f8202b3510 Author: Field G. Van Zee Date: Fri Mar 12 17:12:27 2021 -0600 Disabled _self() and _equal() in bli_pthread API. Details: - Disabled the _self() and _equal() extensions to the bli_pthread API introduced in d479654. These functions were disabled after I realized that they aren't actually needed yet. Thanks to Devin Matthews for helping me reason through the appropriate consumer code that will appear in BLIS (eventually) in a future commit. (Also, I could never get the Windows branch to link properly in clang builds in AppVeyor. See the comment I left in the code, and #485, for more info.) commit f9d604679d8715bc3e79a8630268446889b51388 Author: Field G. Van Zee Date: Thu Mar 11 16:57:55 2021 -0600 Added _self() and _equal() to bli_pthread API. Details: - Expanded the bli_pthread API to include equivalents to pthread_self() and pthread_equal(). Implemented these two functions for all three cpp branches present within bli_pthread.c: systemless, Windows, and Linux/BSD. commit fa9b3c8f6b3d5717f19832362104413e1a86dfb0 Author: Field G. Van Zee Date: Thu Mar 11 15:13:51 2021 -0600 Shuffled code in Windows branch of bli_pthreads.c. Details: - Reordered the definitions in the cpp branch in bli_pthreads.c that defines the bli_pthreads API in terms of Windows API calls. Also added missing comments that mark sections of the API, which brings the code into harmony with other cpp branches (as well as bli_pthread.h). commit 95d4f3934d806b3563f6648d57a4e381d747caf5 Author: Field G. Van Zee Date: Thu Mar 11 13:50:40 2021 -0600 Moved cpp macro redef of strerror_r to bli_env.c. Details: - Relocated the _MSC_VER-guarded cpp macro re-definition of strerror_r (in terms of strerror_s) from bli_thread.h to bli_env.c. It was likely left behind in bli_thread.h in a previous commit, when code that now resides in bli_env.c was moved from bli_thread.c. (I couldn't find any other instance of strerror_r being used in BLIS, so I moved the #define directly to bli_env.c rather than place it in bli_env.h.) The code that uses strerror_r is currently disabled, though, so this commit should have no affect on BLIS. commit 8a3066c315358d45d4f5b710c54594455f9e8fc6 Author: Field G. Van Zee Date: Tue Mar 9 17:52:59 2021 -0600 Relocated gemmsup_ref general stride handling. Details: - Moved the logic that checks for general stridedness in any of the matrix operands in a gemmsup problem. The logic previously resided near the top of bli_gemmsup_int(), which is the thread entry point for the parallel region of the current gemmsup implementation. The problem with this setup was that the code would attempt to reject problems with any general-strided operands by returning BLIS_FAILURE, and that return value was then being ignored by the l3_sup thread decorator, which unconditionally returns BLIS_SUCCESS. To solve this issue, rather than try to manage n return values, one from each of n threads, I simply moved the logic into bli_gemmsup_ref(). I didn't move it any higher (e.g. bli_gemmsup()) because I still want the logic to be part of the current gemmsup handler implementation. That is, perhaps someone else will create a different handler, and that author wants to handle general stride differently. (We don't want to force them into a particular way of handling general stride.) - Removed the general stride handling from bli_gemmtsup_int(), even though this function is inoperative for now. - This commit addresses issue #484. Thanks to RuQing Xu for reporting this issue. commit 670bc7b60f6065893e8ec1bebd2fc9e5ba710dff Author: Nicholai Tukanov Date: Fri Mar 5 13:53:43 2021 -0600 Add low-precision POWER10 gemm kernels (#467) Details: - This commit adds a new BLIS sandbox that (1) provides implementations based on low-precision gemm kernels, and (2) extends the BLIS typed API for those new implementations. Currently, these new kernels can only be used for the POWER10 microarchitecture; however, they may provide a template for developing similar kernels for other microarchitectures (even those beyond POWER), as changes would likely be limited to select places in the microkernel and possibly the packing routines. The new low-precision operations that are now supported include: shgemm, sbgemm, i16gemm, i8gemm, i4gemm. For more information, refer to the POWER10.md document that is included in 'sandbox/power10'. commit b8dcc5bc75a746807d6f8fa22dc2123c98396bf5 Author: RuQing Xu Date: Tue Mar 2 06:58:24 2021 +0800 Fixed typed API definition for gemmt (#476) Details: - Fixed incorrect definition and prototype of bli_?gemmt() in frame/3/bli_l3_tapi.c and .h, respectively. gemmt was previously defined identically to gemm, which was wrong because it did not take into account the uplo property of C. - Fixed incorrect API documentation for her2k/syr2k in BLISTypedAPI.md. Specifically, the document erroneously listed only a single transab parameter instead of transa and transb. commit a0e4fe2340a93521e1b1a835a96d0f26dec8406a Author: Ilknur Date: Tue Mar 2 02:06:56 2021 +0400 Fixed double free() in level1v example (#482) Details: - In exampls/tapi/00level1v.c, pointer 'z' was being freed twice and pointer 'a' was not being freed at all. This commit correctly frees each pointer exactly once. commit f5871c7e06a75799251d6b55a8a5fbfa1a92cf95 Author: Field G. Van Zee Date: Sun Feb 28 17:03:57 2021 -0600 Added complex asm packm kernels for 'haswell' set. Details: - Implemented assembly-based packm kernels for single- and double- precision complex domain (c and z) and housed them in the 'haswell' kernel set. This means c3xk, c8xk, z3xk, and z4xk are now all optimized. - Registered the aforementioned packm kernels in the haswell, zen, and zen2 subconfigs. - Minor modifications to the corresponding s and d packm kernels that were introduced in 426ad67. - Thanks to AMD, who originally contributed the double-precision real packm kernels (d6xk and d8xk), upon which these complex kernels are partially based. commit 426ad679f55264e381eb57a372632b774320fb85 Author: Field G. Van Zee Date: Sat Feb 27 18:39:56 2021 -0600 Added assembly packm kernels for 'haswell' set. Details: - Implemented assembly-based packm kernels for single- and double- precision real domain (s and d) and housed them in the 'haswell' kernel set. This means s6xk, s16xk, d6xk, and d8xk are now all optimized. - Registered the aforementioned packm kernels in the haswell, zen, and zen2 subconfigs. - Thanks to AMD, who originally contributed the double-precision real packm kernels (d6xk and d8xk), which I have now tweaked and used to create comparable single-precision real kernels (s6xk and s16xk). commit f50c1b7e5886d29efe134e1994d05af9949cd4b6 Merge: 8f39aea1 b3953b93 Author: Devin Matthews Date: Mon Feb 1 11:55:51 2021 -0600 Merge pull request #473 from ajaypanyala/pkgconfig build: generate pkgconfig file commit 8f39aea11f80a805b66cff4b4dc5e72727ea461d Merge: f8db9fb3 2a815d5b Author: Field G. Van Zee Date: Sat Jan 30 17:59:56 2021 -0600 Merge branch 'dev' commit f8db9fb33b48844d6b47fdef699625bd9197745a Author: Field G. Van Zee Date: Thu Jan 28 08:04:52 2021 -0600 Fixed missing parentheses in README.md Citations. commit b3953b938eee59f79b4a4162ba583a5cb59fa34e Author: Ajay Panyala Date: Tue Jan 12 17:07:04 2021 -0800 drop CFLAGS in the generated pkgconfig file commit b02d9376bac31c1a1c7916f44c4946277a1425e2 Author: Ajay Panyala Date: Mon Jan 11 20:50:01 2021 -0800 add datadir commit d8d8deeb6d8b84adb7ae5fdb88c6dd4f06624a76 Author: Ajay Panyala Date: Mon Jan 11 17:47:50 2021 -0800 generate pkgconfig file commit 8c65411c7c8737248a6f054ffa0ce008c95cb515 Merge: 328b4f88 874c3f04 Author: Devin Matthews Date: Mon Jan 11 16:01:45 2021 -0600 Merge pull request #471 from flame/fix-470 Fix kernel-to-config mapping for intel64 commit 874c3f04ece9af4d8fdf0e2713e21a259c117656 Author: Devin Matthews Date: Fri Jan 8 13:56:30 2021 -0600 Update configure Choose last sub-config in the kernel-to-config map if the config list doesn't contain the name of the kernel set. E.g. for "zen: skx knl haswell" pick "haswell" instead of "skx" which was chosen previously. Fixes #470. commit 2a815d5b365d934cb351b2f2a8cd1366e997b2e1 Author: Field G. Van Zee Date: Mon Jan 4 18:03:39 2021 -0600 Support trsm pre-inversion in 1m, bb, ref kernels. Details: - Expanded support for disabling trsm diagonal pre-inversion to other microkernel types, including the reference microkernel as well as the kernel implementations for 1m and the pre-broadcast B (bb) format used by the power9 subconfig. This builds on the 'haswell' and 'penryn' kernel support added in 7038bba. Thanks to Bhaskar Nallani for reminding me, in #461 (post-closure), that 1m support was missing from that commit. - Removed cpp branch of ref_kernels/3/bli_trsm_ref.c that contained the omp simd implementation after making a stripped-down copy in 'old'. This code has been disabled for some time and it seemed better suited to rot away out of sight rather than clutter up a file that is already cluttered by the presence of lower and upper versions. - Minor comment update to bli_ind_init(). commit c3ed2cbb9f60100fc9beb2a9d75476de9f711dc5 Author: Field G. Van Zee Date: Mon Jan 4 16:16:32 2021 -0600 Enable 1m only if real domain ukr is not reference. Details: - Previously, BLIS would automatically enable use of the 1m method for a given precision if the complex domain microkernel was a reference kernel. This commit adds an additional constraint so that 1m is only enabled if the corresponding real domain microkernel is NOT reference. That is, BLIS now forgos use of 1m if both the real and complex domain kernels are reference implementations. Note that this does not prevent 1m from being enabled manually under those conditions; it only means that 1m will not be enabled automatically at initialization-time. commit ed50c947385ba3b0b5d550015f38f7f0a31755c0 Merge: 0cef09aa 328b4f88 Author: Field G. Van Zee Date: Mon Jan 4 14:31:44 2021 -0600 Merge branch 'master' into dev commit 328b4f8872b4bca9a53d2de8c6e285f3eb13d196 Author: Devin Matthews Date: Wed Dec 30 17:54:18 2020 -0600 Shared object (dylib) was not built correctly for partial build. The SO build rule used $? instead of $^. Observed on macOS, not sure if it affected Linux or not. commit ae6ef66ef824da9bc6348bf9d1b588cd4f2ded9b Author: Devin Matthews Date: Wed Dec 30 17:34:55 2020 -0600 bli_diag_offset_with_trans had wrong return type. Fixes #468. commit ebcf197fb86fdd0a864ea928140752bc2462e8c6 Merge: 472f138c 21aa67e1 Author: Devin Matthews Date: Sat Dec 5 22:26:27 2020 -0600 Merge pull request #466 from isuruf/patch-3 fix cc_vendor for crosstool-ng toolchains commit 21aa67e11cebbc5a6dd7c6353154256294df3c33 Author: Isuru Fernando Date: Sat Dec 5 21:59:13 2020 -0600 fix cc_vendor for crosstool-ng toolchains commit 472f138cb927b7259126ebb9c68919cfcc7a4ea3 Author: Field G. Van Zee Date: Sat Dec 5 14:13:52 2020 -0600 Fixed typo in README.md to CodingConventions.md. commit 0cef09aa92208441a656bf097f197ea8e22b533b Author: Field G. Van Zee Date: Fri Dec 4 16:40:59 2020 -0600 Consolidated code in level-3 _front() functions. Details: - Reduced a code segment that appears in all of the bli_*_front() functions except for bli_gemm_front(). Previously, the code looked like this (taken from bli_herk_front()): if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &ah_local ); } This code segment is part of a sort-of-hack that allows us to communicate the pack schemas into the level-3 thread decorator, which needs them so that they can be passed into bli_l3_cntl_create_if(), where the control tree is created. However, the first conditional case above is unnecessary because the second case is fully generalized. That is, even in the native case, the context contains correct, queryable schemas. Thus, these code segments were reduced to something like: pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &ah_local ); There's always a small chance that the seemingly unnecessary code in the first branch case has some special use that is not apparent to me, but the testsuite's default input parameters seem to think this commit will be fine. commit 7038bbaa05484141195822291cf3ba88cbce4980 Author: Field G. Van Zee Date: Fri Dec 4 16:08:15 2020 -0600 Optionally disable trsm diagonal pre-inversion. Details: - Implemented a configure-time option, --disable-trsm-preinversion, that optionally disables the pre-inversion of diagonal elements of the triangular matrix in the trsm operation and instead uses division instructions within the gemmtrsm microkernels. Pre-inversion is enabled by default. When it is disabled, performance may suffer slightly, but numerical robustness should improve for certain pathological cases involving denormal (subnormal) numbers that would otherwise result in overflow in the pre-inverted value. Thanks to Bhaskar Nallani for reporting this issue via #461. - Added preprocessor macro guards to bli_trsm_cntl.c as well as the gemmtrsm microkernels for 'haswell' and 'penryn' kernel sets pursuant to the aforementioned feature. - Added macros to frame/include/bli_x86_asm_macros.h related to division instructions. commit 78aee79452cce2691c40f05b3632bdfc122300af Author: Field G. Van Zee Date: Wed Dec 2 13:02:36 2020 -0600 Allow amaxv testsuite module to run with dim = 0. Details: - Exit early from libblis_test_amaxv_check() when the vector dimension (length) of x is 0. This allows the module to run when the testsuite driver passes in a problem size of 0. Thanks to Meghana Vankadari for alerting us to this issue via #459. - Note: All other testsuite modules appear to work with problem sizes of 0, except for the microkernel modules. I chose not to "fix" those modules because a failure (or segmentation fault, as happens in this case) is actually meaningful in that it alerts the developer that some microkernels cannot be used with k = 0. Specifically, the 'haswell' kernel set contains microkernels that preload elements of B. Those microkernels would need to be restructured to avoid preloading in order to support usage when k = 0. commit 92d2b12a44ee0990c22735472aeaf1c17deb2d9b Author: Field G. Van Zee Date: Wed Dec 2 13:02:00 2020 -0600 Fixed obscure testsuite gemmt dependency bug. Details: - Fixed a bug in the gemmt testsuite module that only manifested when testing of gemmt is enabled but testing of gemv is disabled. The bug was due to a copy-paste error dating back to the introduction of gemmt in 88ad841. commit b43dae9a5d2f078c9bbe07079031d6c00a68b7de Author: Field G. Van Zee Date: Tue Dec 1 16:44:38 2020 -0600 Fixed copy-paste bugs in edge-case sup kernels. Details: - Fixed bugs in two sup kernels, bli_dgemmsup_rv_haswell_asm_1x6() and bli_dgemmsup_rd_haswell_asm_1x4(), which involved extraneous assembly instructions that were left over from when the kernels were first written. These instructions would cause segmentation faults in some situations where extra memory was not allocated beyond the end of the matrix buffers. Thanks to Kiran Varaganti for reporting these bugs and to Bhaskar Nallani for identifying the cause and solution. commit 11dfc176a3c422729f453f6c23204cf023e9954d Author: Field G. Van Zee Date: Tue Dec 1 19:51:27 2020 +0000 Reorganized thread auto-factorization logic. Details: - Reorganized logic of bli_thread_partition_2x2() so that the primary guts were factored out into "fast" and "slow" variants. Then added logic to the "fast" variant that allows for more optimal thread factorizations in some situations where there is at least one factor of 2. - Changed BLIS_THREAD_RATIO_M from 2 to 1 in bli_kernel_macro_defs.h and added comments to that file describing BLIS_THREAD_RATIO_? and BLIS_THREAD_MAX_?R. - In bli_family_zen.h and bli_family_zen2.h, preprocessed out several macros not used in vanilla BLIS and removed the unused macro BLIS_ENABLE_ZEN_BLOCK_SIZES from the former file. - Disabled AMD's small matrix handling entry points in bli_syrk_front.c and bli_trsm_front.c. (These branches of small matrix handling have not been reviewed by vanilla BLIS developers.) - Added commented-out calls printf() to bli_rntm.c. - Whitespace changes to bli_thread.c. commit 6d3bafacd7aa7ad198762b39490876c172bfbbcb Author: Devin Matthews Date: Sat Nov 28 17:17:56 2020 -0600 Update BuildSystem.md Add git version >= 1.8.5 requirement (see #462). commit 64856ea5a61b01d585750815788b6a775f729647 Author: Field G. Van Zee Date: Mon Nov 23 16:54:51 2020 -0600 Auto-reduce (by default) prime numbers of threads. Details: - When requesting multithreaded parallelism by specifying the total number of threads (whether it be via environment variable, globally at runtime, or locally at runtime), reduce the number of threads actually used by one if the original value (a) is prime and (b) exceeds a minimum threshold defined by the macro BLIS_NT_MAX_PRIME, which is set to 11 by default. If, when specifying the total number of threads (and not the individual ways of parallelism for each loop), prime numbers of threads are desired, this feature may be overridden by defining the BLIS_ENABLE_AUTO_PRIME_NUM_THREADS macro in the bli_family_*.h that corresponds to the configuration family targeted at configure-time. (For now, there is no configure option(s) to control this feature.) Thanks to Jeff Diamond for suggesting this change. - Defined a new function in bli_thread.c, bli_is_prime(), that returns a bool that determines whether an integer is prime. This function is implemented in terms of existing functions in bli_thread.c. - Updated docs/Multithreading.md to document the above feature, along with unrelated minor edits. commit 55933b6ff6b9b8a12041715f42bba06273d84b74 Author: Field G. Van Zee Date: Fri Nov 20 10:39:32 2020 -0600 Added missing attribution to docs/ReleaseNotes.md. commit e310f57b4b29fbfee479e0f9fe2040851efdec4f Author: Field G. Van Zee Date: Thu Nov 19 13:33:37 2020 -0600 CHANGELOG update (0.8.0) commit 9b387f6d5a010969727ec583c0cdd067a5274ed8 Author: Field G. Van Zee Date: Thu Nov 19 13:33:37 2020 -0600 Version file update (0.8.0) commit 2928ec750d3a3e1e5d55de5b57ddc04e9d0bd796 Author: Field G. Van Zee Date: Wed Nov 18 18:31:35 2020 -0600 ReleaseNotes.md update in advance of next version. Details: - Updated docs/ReleaseNotes.md in preparation for next version. commit b9899bedff6854639468daa7a973bb14ca131a74 Author: Field G. Van Zee Date: Wed Nov 18 16:52:41 2020 -0600 CREDITS file update. commit 9bb23e6c2a44b77292a72093938ab1ee6e6cc26a Author: Field G. Van Zee Date: Mon Nov 16 15:55:45 2020 -0600 Added support for systemless build (no pthreads). Details: - Added a configure option, --[enable|disable]-system, which determines whether the modest operating system dependencies in BLIS are included. The most notable example of this on Linux and BSD/OSX is the use of POSIX threads to ensure thread safety for when application-level threads call BLIS. When --disable-system is given, the bli_pthreads implementation is dummied out entirely, allowing the calling code within BLIS to remain unchanged. Why would anyone want to build BLIS like this? The motivating example was submitted via #454 in which a user wanted to build BLIS for a simulator such as gem5 where thread safety may not be a concern (and where the operating system is largely absent anyway). Thanks to Stepan Nassyr for suggesting this feature. - Another, more minor side effect of the --disable-system option is that the implementation of bli_clock() unconditionally returns 0.0 instead of the time elapsed since some fixed point in the past. The reasoning for this is that if the operating system is truly minimal, the system function call upon which bli_clock() would normally be implemented (e.g. clock_gettime()) may not be available. - Refactored preprocess-guarded code in bli_pthread.c and bli_pthread.h to remove redundancies. - Removed old comments and commented #include of "bli_pthread_wrap.h" from bli_system.h. - Documented bli_clock() and bli_clock_min_diff() in BLISObjectAPI.md and BLISTypedAPI.md, with a note that both are non-functional when BLIS is configured with --disable-system. commit 88ad84143414644df4c56733b1cf91a36bfacaf8 Author: Field G. Van Zee Date: Sat Nov 14 09:39:48 2020 -0600 Squash-merge 'pr' into 'squash'. (#457) Merged contributions from AMD's AOCL BLIS (#448). Details: - Added support for level-3 operation gemmt, which performs a gemm on only the lower or upper triangle of a square matrix C. For now, only the conventional/large code path will be supported (in vanilla BLIS). This was accomplished by leveraging the existing variant logic for herk. However, some of the infrastructure to support a gemmtsup is included in this commit, including - A bli_gemmtsup() front-end, similar to bli_gemmsup(). - A bli_gemmtsup_ref() reference handler function. - A bli_gemmtsup_int() variant chooser function (with variant calls commented out). - Added support for inducing complex domain gemmt via the 1m method. - Added gemmt APIs to the BLAS and CBLAS compatiblity layers. - Added gemmt test module to testsuite. - Added standalone gemmt test driver to 'test' directory. - Documented gemmt APIs in BLISObjectAPI.md and BLISTypedAPI.md. - Added a C++ template header (blis.hh) containing a BLAS-inspired wrapper to a set of polymorphic CBLAS-like function wrappers defined in another header (cblas.hh). These two headers are installed if running the 'install' target with INSTALL_HH is set to 'yes'. (Also added a set of unit tests that exercise blis.hh, although they are disabled for now because they aren't compatible with out-of-tree builds.) These files now live in the 'vendor' top-level directory. - Various updates to 'zen' and 'zen2' subconfigurations, particularly within the context initialization functions. - Added s and d copyv, setv, and swapv kernels to kernels/zen/1, and various minor updates to dotv and scalv kernels. Also added various sup kernels contributed by AMD to kernels/zen/3. However, these kernels are (for now) not yet used, in part because they caused AppVeyor clang failures, and also because I have not found time to review and vet them. - Output the python found during configure into the definition of PYTHON in build/config.mk (via build/config.mk.in). - Added early-return checks (A, B, or C with zero dimension; alpha = 0) to bli_gemm_front.c. - Implemented explicit beta = 0 handling in for the sgemm ukernel in bli_gemm_armv7a_int_d4x4.c, which was previously missing. This latent bug surfaced because the gemmt module verifies its computation using gemm with its beta parameter set to zero, which, on a cortexa15 system caused the gemm kernel code to unconditionally multiply the uninitialized C data by beta. The C matrix likely contained non-numeric values such as NaN, which then would have resulted in a false failure. - Fixed a bug whereby the implementation for bli_herk_determine_kc(), in bli_l3_blocksize.c, was inadvertantly being defined in terms of helper functions meant for trmm. This bug was probably harmless since the trmm code should have also done the right thing for herk. - Used cpp macros to neutralize the various AOCL_DTL_TRACE_ macros in kernels/zen/3/bli_gemm_small.c since those macros are not used in vanilla BLIS. - Added cpp guard to definition of bli_mem_clear() in bli_mem.h to accommodate C++'s stricter type checking. - Added cpp guard to test/*.c drivers that facilitate compilation on Windows systems. - Various whitespace changes. commit 234b8b0cf48f1ee965bd7999b291fc7add3b9a54 Author: Field G. Van Zee Date: Thu Nov 12 19:11:16 2020 -0600 Increased dotxaxpyf testsuite thresholds. Details: - Increased the test thresholds used by the dotxaxpyf testsuite module by a factor of five in order to avoid residuals that unnecessarily fall in the MARGINAL range. This commit should fix #455. Thanks to @nagsingh for reporting this issue. commit ed612dd82c50063cfd23576a6b2465213d31b14b Author: Field G. Van Zee Date: Sat Nov 7 13:09:42 2020 -0600 Updated README.md with sgemmsup blurb. Details: - Added an entry to the "What's New" section of the README.md to announce the availability of sgemmsup. commit e14424f55b15d67e8d18384aea45a11b9b772e02 Merge: 0cfe1aac eccdd75a Author: Field G. Van Zee Date: Sat Nov 7 13:02:50 2020 -0600 Merge branch 'dev' commit 0cfe1aac222008a78dff3ee03ef5183413936706 Author: Field G. Van Zee Date: Fri Oct 30 17:10:36 2020 -0500 Relocated operation index to ToC in API docs. Details: - Moved the "Operation index" section of both the BLISObjectAPI.md and BLISTypedAPI.md docs to appear immediately after the table of contents of each document. This allows the reader to quickly jump to the documentation for any operation without having to scroll through much of the document (when rendered via a web browser). - Fixed a mistake in the BLISObjectAPI.md for the setd operation, which does *not* observe the diag property of its matrix argument. Thanks to Jeff Diamond for reporting this. commit 2a0682f8e5998be536da313525292f0da6193147 Author: Field G. Van Zee Date: Sun Oct 18 18:04:03 2020 -0500 Implemented runtime subconfig selection (#451). Details: - Implemented support for the user manually overriding the automatic subconfiguration selection that happens at runtime. This override can be requested by setting the BLIS_ARCH_TYPE environment variable. The variable must be set to the arch_t id (as enumerated in bli_type_defs.h) corresponding to the desired subconfiguration. If a value outside this enumerated range is given, BLIS will abort with an error message. If the value is in the valid range but corresponds to a subconfiguration that was not activated at configure-time/compile-time, BLIS will abort with a (different) error message. Thanks to decandia50 for suggesting this feature via issue #451. - Defined a new function bli_gks_lookup_id to return the address of an internal data structure within the gks. If this address is NULL, then it indicates that the subconfig corresponding to the arch_t id passed into the function was not compiled into BLIS. This function is used in the second of the two abort scenarios described above. - Defined the enumerated error code BLIS_UNINITIALIZED_GKS_CNTX, which is returned for the latter of the two abort scenarios mentioned above, along with a corresponding error message and a function to perform the error check. - Added cpp macro branching to bli_env.c to support compilation of the auto-detect.x executable during configure-time. This cpp branch is similar to the cpp code already found in bli_arch.c and bli_cpuid.c. - Cleaned up the auto_detect() function to facilitate easier maintenance going forward. Also added a convenient debug switch that outputs the compilation command for the auto-detect.x executable and exits. commit eccdd75a2d8a0c46e91e94036179c49aa5fa601c Author: Field G. Van Zee Date: Fri Oct 9 15:44:16 2020 -0500 Whitespace tweak in docs/PerformanceSmall.md. commit 7677e9ba60ac27496e3421c2acc7c239e3f860e9 Merge: addcd46b a0849d39 Author: Field G. Van Zee Date: Fri Oct 9 15:41:25 2020 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit addcd46b0559d401aa7d33d4c7e6f63f5313a8e0 Author: Field G. Van Zee Date: Fri Oct 9 15:41:09 2020 -0500 Added Epyc 7742 Zen2 ("Rome") sup perf results. Details: - Added single-threaded and multithreaded sup performance results to docs/PerformanceSmall.md for both sgemm and dgemm. These results were gathered on an Epyc 7742 "Rome" server featuring AMD's Zen2 microarchitecture. Special thanks to Jeff Diamond for facilitating access to the system via the Oracle Cloud. - Updates to octave scripts in test/sup/octave for use with Octave 5.2 and for use with subplot_tight(). - Minor updates to octave scripts in test/3/octave. - Renamed files containing the previous Zen performance results for consistency with the new results. - Decreased line thickness slightly in large/conventional Zen2 graphs. I'm done tweaking those this time. Really. - Added missing line regarding eigen header installation for each microarchitecture section. commit a0849d390d04067b82af937cda8191b049b98915 Author: Field G. Van Zee Date: Fri Oct 9 20:22:17 2020 +0000 Register l3 sup kernels in zen2 subconfig. Details: - Registered full suite of sgemm and dgemm sup millikernels, blocksizes, and crossover thresholds in bli_cntx_init_zen2.c. - Minor updates to test/sup/runme.sh for running on Zen2 Epyc 7742 system. commit d98368c32d5fbfaab8966ee331d9bcb5c4fe7a59 Author: Field G. Van Zee Date: Thu Oct 8 19:05:51 2020 -0500 Another tweak to line thickness of Zen2 graphs. commit 1855dfbdaafa37892b36c97fd317fd5d8da76676 Author: Field G. Van Zee Date: Thu Oct 8 19:01:00 2020 -0500 Tweaked line thickness in Zen2 graphs once more. Details: - Decreased (relative to previous commit) line thickness in recent Zen2 graphs. commit 0991611e7ed82889c53a5c3f1ef1d49552c50d61 Author: Field G. Van Zee Date: Thu Oct 8 18:54:49 2020 -0500 Increased line thickness in recent Zen2 graphs. Details: - Increased the width of the lines in the graphs introduced in 74ec6b8. commit 8273cbacd7799e9af59e5320d66055f2f5d9cb31 Author: Field G. Van Zee Date: Wed Oct 7 14:51:33 2020 -0500 README.md, docs/FAQ.md updates. Details: - Added a frequently asked question to docs/FAQ.md regarding the difference between upstream (vanilla) BLIS and AMD BLIS. - Updated the name of ICES in the README.md to reflect the Oden rebranding. commit a178a822ad3d5021489a0e61f909d8550ae12a8f Author: Field G. Van Zee Date: Wed Sep 30 16:00:52 2020 -0500 Added Zen2 links to docs/Performance.md Contents. commit 74ec6b8f457cabe37d2382aaab35ba04fc737948 Author: Field G. Van Zee Date: Wed Sep 30 15:54:18 2020 -0500 Added Epyc 7742 Zen2 ("Rome") performance results. Details: - Added single-threaded and multithreaded performance results to docs/Performance.md. These results were gathered on an Epyc 7742 "Rome" server with AMD's Zen2 microarchitecture. Special thanks to Jeff Diamond for facilitating access to the system via the Oracle Cloud. - Renamed files containing the previous Zen performance results for consistency with the new results. commit bc4a213a2c3dcf8bbfcbb3a1ef3e9fc9e3226c34 Author: Field G. Van Zee Date: Wed Sep 30 15:28:20 2020 -0500 Updated matlab (now octave) plot code in test/3. Details: - Renamed test/3/matlab to test/3/octave. - Within test/3, updated and tuned plot_l3_perf.m and plot_panel_4x5.m files for use with octave (which is free and doesn't crash on me mid-way through my use of subplot). - Updated runthese.m scratchpad for zen2 invocations. - Added Nikolay S.'s subplot_tight() function, along with its license. commit c77ddc418187e1884fa6bcfe570eee295b9cb8bc Author: Field G. Van Zee Date: Wed Sep 30 20:15:43 2020 +0000 Added optional numactl usage to test/3/runme.sh. commit 2d8ec164e7ae4f0c461c27309dc1f5d1966eb003 Author: Nicholai Tukanov Date: Tue Sep 29 16:52:18 2020 -0500 Add POWER10 support to BLIS (#450) commit 4fd8d9fec2052257bf2a5c6e0d48ae619ff6c3e4 Author: Field G. Van Zee Date: Mon Sep 28 23:39:05 2020 +0000 Tweaked zen2 subconfig's MC cache blocksizes. Details: - Updated the MC cache blocksizes registered by the 'zen2' subconfig. - Minor updates to test/3/Makefile and test/3/runme.sh. commit 5efcdeffd58af621476d179afc0c19c0f912baa8 Author: Field G. Van Zee Date: Fri Sep 25 14:25:24 2020 -0500 More minor README.md updates. commit 9e940f8aad6f065ea1689e791b9a4e1fb7900c40 Author: Field G. Van Zee Date: Fri Sep 25 13:53:35 2020 -0500 Added 1m SISC bibtex to README.md. Details: - Added final citation info to 1m bibtex in README.md file. - Updated draft 1m paper link. - Changed some http to https. commit e293cae2d1b9067261f613f25eaa0e871356b317 Author: Field G. Van Zee Date: Tue Sep 15 16:09:11 2020 -0500 Implemented sgemmsup assembly kernels. Details: - Created a set of single-precision real millikernels and microkernels comparable to the dgemmsup kernels that already exist within BLIS. - Added prototypes for all kernels within bli_kernels_haswell.h. - Registered entry-point millikernels in bli_cntx_init_haswell.c and bli_cntx_init_zen.c. - Added sgemmsup support to the Makefile, runme.sh script, and source file in test/sup. This included edits that allow for separate "small" dimensions for single- and double-precision as well as for single- vs. multithreaded execution. commit 2765c6f37c11cb7f71cd4b81c64cea6130636c68 Author: Field G. Van Zee Date: Sat Sep 12 17:48:15 2020 -0500 Type saga continues; fixed sgemm ukernel signature. Details: - Changed double* pointers in sgemm function signature to float*. At this point I've lost track of whether this was my fault or another dormant bug like the one described in ece9f6a, but at this point I no longer care. It's one of those days (aka I didn't ask for this). commit 0779559509e0a1af077530d09ed151dac54f32ee Author: Field G. Van Zee Date: Sat Sep 12 17:37:21 2020 -0500 Fixed missing restrict in knl sgemm prototype. Details: - Added a missing 'restrict' qualifier in the sgemm ukernel prototype for knl. (Not sure how that code was ever compiling before now.) commit ece9f6a3ef1b26b53ecf968cd069df7a85b139fb Author: Field G. Van Zee Date: Sat Sep 12 17:22:42 2020 -0500 Fixed dormant type bugs in bli_kernels_knl.h. Details: - Fixed dormant type mismatches in the use of the prototype-generating macros in bli_kernels_knl.h. Specifically, some float prototypes were incorrectly using double as their ctype. This didn't actually matter until the type changes in 645d771, as previously those types were not used since packm was prototyped with void* pointers. commit 8ebb3b60e1c4c045ddb48e02de6e246cecde24a4 Author: Field G. Van Zee Date: Sat Sep 12 17:00:47 2020 -0500 Fixed accidental breakage in 645d771. Details: - In trying to clean up kappa_cast variables in the reference packm kernels, which I initally believed to be redundant given the other void* -> ctype* changes in 645d771, I accidentally ended up violating restrict semantics for 1e/1r packing and possibly other packm kernels. (Normally, my pre-commit testsuite run would have caught this, but I was unknowingly using an edited input.operations file in which I'd disabled most tests as part of unrelated work.) This commit reverts the kappa_cast changes in 645d771. commit 645d771a14ae89aa7131d6f8f4f4a8090329d05e Author: Field G. Van Zee Date: Sat Sep 12 15:31:56 2020 -0500 Minor packm kernel type cleanup (void* -> ctype*). Details: - Changed all void* function arguments in reference packm kernels to those of the native type (ctype*). These pointers no longer need to be void* and are better represented by their native types anyway. (See below for details.) Updated knl packm kernels accordingly. - In the definition of the PACKM_KER_PROT prototype macro template in frame/1m/bli_l1m_ker_prot.h, changed the pointer types for kappa, a, and p from void* to ctype*. They were originally void* because these function signatures had to share the same type so they could all be stored in a single array of that shared type, from which they were queried and called by packm_cxk(). This is no longer how the function pointers are stored, and so it no longer makes sense to force the caller of packm kernels to use void*, only so that the implementor of the packm kernels can typecast back to the native datatype within the kernel definition. This change has no effect internally within BLIS because currently all packm kernels are called after querying the function addresses from the context and then typecasting to the appropriate function pointer type, which is based upon type-specific function pointers like float* and double*. - Removed a comment in frame/1m/bli_l1m_ft_ker.h that was outdated and misleading due to changes to the handling of packm kernels since moving them into the context. commit 54bf6c35542a297e25bc8efec6067a6df80536f4 Author: Field G. Van Zee Date: Thu Sep 10 15:42:01 2020 -0500 Minor README.md update. Details: - Added a new entry to the "What people are saying about BLIS" section. commit e50b4d40462714ae33df284655a2faf7fa35f37c Author: Field G. Van Zee Date: Wed Sep 9 14:12:53 2020 -0500 Minor update to README.md (SIAM Best Paper Prize). commit a8efb72074691e2610372108becd88b4b392299e Merge: b0c4da17 97e87f2c Author: Devin Matthews Date: Mon Sep 7 16:18:19 2020 -0500 Merge pull request #434 from flame/intel-zdot Add an option to change the complex return type. commit 97e87f2c9f3878a05e1b7c6ec237ee88d9a72a42 Author: Field G. Van Zee Date: Mon Sep 7 15:56:42 2020 -0500 Whitespace/comment updates to #434 PR. commit b0c4da1732b6c6a9ff66f70c36e4722e0f9645ae Merge: 810e90ee b1b5870d Author: Devin Matthews Date: Mon Sep 7 15:47:54 2020 -0500 Merge pull request #436 from flame/s390x Add checks so that s390x is detected as 64-bit. commit 810e90ee806510c57504f0cf8eeaf608d38bd9dd Author: Field G. Van Zee Date: Tue Sep 1 16:11:40 2020 -0500 Minor README.md update. Details: - Added HPE to list of funders. - Changed http to https in funders' website links. commit 7d411282196e036991c26e52cb5e5f85769c8059 Author: Devin Matthews Date: Thu Aug 13 17:50:58 2020 -0500 Use -O2 for all framework code. (#435) It seems that -O3 might be causing intermittent problems with the f2c'ed packed and banded code. -O3 is retained for kernel code. Fixes #341 and fixes #342. commit 9c5b485d356367b0a1288761cd623f52036e7344 Author: Dave Love Date: Fri Aug 7 20:11:18 2020 +0000 Don't override -mcpu with -march on ARM (#353) * Use -mcpu for ARM See the GCC doc about -march, -mtune, and -mpu and maybe https://community.arm.com/developer/tools-software/tools/b/tools-software-ides-blog/posts/compiler-flags-across-architectures-march-mtune-and-mcpu * Fix typo in flags * Fix typo in cortexa9 flags * Modify cortexa53 compilation flags to fix failing BLAS check (#341) commit c253d14a72a746b670b3ffbb6e81bcafc73d1133 Author: Devin Matthews Date: Fri Aug 7 09:39:04 2020 -0500 Also handle Intel-style complex return in CBLAS interface. commit 5d653a11a0cc71305d0995507b1733995856f475 Author: Devin Matthews Date: Thu Aug 6 17:58:26 2020 -0500 Update Multithreading.md Addresses the issue raised in #426. commit b1b5870dd3f9b1c78cf5f58a53514d73f001fc4c Author: Devin Matthews Date: Thu Aug 6 17:34:20 2020 -0500 Add checks so that s390x is detected as 64-bit. commit 882dcb11bfc9ea50aa2f9044621833efd90d42be Author: Field G. Van Zee Date: Thu Aug 6 17:28:14 2020 -0500 Mention example code at top of documentation docs. Details: - Steer the reader towards the example code section of each documentation doc (object and typed). - Trivial update to examples/oapi/README, examples/tapi/README. commit f4894512e5bf56ff83701c07dd02972e300741a5 Author: Field G. Van Zee Date: Thu Aug 6 17:20:00 2020 -0500 Very minor updates to previous commit. commit adedb893ae8dfacd1dc54035979e15c44d589dbb Author: Field G. Van Zee Date: Thu Aug 6 17:14:01 2020 -0500 Documented mutator functions in BLISObjectAPI.md. Details: - Added documentation for commonly-used object mutator functions in BLISObjectAPI.md. Previously, only accessor functions were documented. Thanks to Jeff Diamond for pointing out this omission. - Explicitly set the 'diag' property of objects in oapi example modules (08level2.c and 09level3.c). commit 5b5278ff494888509543a79c09ea82089f6c95d9 Author: Devin Matthews Date: Thu Aug 6 14:19:37 2020 -0500 Use #ifdef instead of #if as macro may be undefined. commit 7fdc0fc893d0c6727b725ea842053b65be2c20ba Author: Devin Matthews Date: Thu Aug 6 14:03:55 2020 -0500 Add an option to change the complex return type. ifort apparently does not return complex numbers in registers as in C/C++ (or gfortran), but instead creates a "hidden" first parameter for the return value. The option --complex-return=gnu|intel has been added, as well as a guess based on a provided FC if not specified (otherwise default to gnu). This option affects the signatures of cdotc, cdotu, zdotc, and zdotu, and a single library cannot be used with both GNU and Intel Fortran compilers. Fixes #433. commit 6e522e5823b762d4be09b6acdca30faafba56758 Author: Field G. Van Zee Date: Thu Jul 30 19:31:37 2020 -0500 Mention disabling of sup in docs/Sandboxes.md. Details: - Added language to remind the reader to disable sup if the intended behavior is for the sandbox implementation to handle all problem sizes, even the smaller ones that would normally be handled by the sup code path. commit 00e14cb6d849e963a2e1ac35e7dbbe186af00a58 Author: Field G. Van Zee Date: Wed Jul 29 14:24:34 2020 -0500 Replaced use of bool_t type with C99 bool. Details: - Textually replaced nearly all non-comment instances of bool_t with the C99 bool type. A few remaining instances, such as those in the files bli_herk_x_ker_var2.c, bli_trmm_xx_ker_var2.c, and bli_trsm_xx_ker_var2.c, were promoted to dim_t since they were being used not for boolean purposes but to index into an array. - This commit constitutes the third phase of a transition toward using C99's bool instead of bool_t, which was raised in issue #420. The first phase, which cleaned up various typecasts in preparation for using bool as the basis for bool_t (instead of gint_t), was implemented by commit a69a4d7. The second phase, which redefined the bool_t typedef in terms of bool (from gint_t), was implemented by commit 2c554c2. commit 2c554c2fce885f965a425e727a0314d3ba66c06d Author: Field G. Van Zee Date: Fri Jul 24 15:57:19 2020 -0500 Redefined bool_t typedef in terms of C99 bool. Details: - Changed the typedef that defines bool_t from: typedef gint_t bool_t; where gint_t is a signed integer that forms the basis of most other integers in BLIS, to: typedef bool bool_t; - Changed BLIS's TRUE and FALSE macro definitions from being in terms of integer literals: #define TRUE 1 #define FALSE 0 to being in terms of C99 boolean constants: #define TRUE true #define FALSE false which are provided by stdbool.h. - This commit constitutes the second phase of a transition toward using C99's bool instead of bool_t, which will address issue #420. The first phase, which cleaned up various typecasts in preparation for using bool as the basis for bool_t (instead of gint_t), was implemented by commit a69a4d7. commit e01dd125581cec87f61e15590922de0dc938ec42 Author: Field G. Van Zee Date: Fri Jul 24 15:41:46 2020 -0500 Fail-safe updates to Makefiles in 'test' dir. Details: - Updated Makefiles in test, test/3, and test/sup so that running any of the usual targets without having first built BLIS results in a helpful error message. For example, if BLIS is not yet configured, make will output: Makefile:327: *** Cannot proceed: config.mk not detected! Run configure first. Stop. Similarly, if BLIS is configured but not yet built, make will output: Makefile:340: *** Cannot proceed: BLIS library not yet built! Run make first. Stop. In previous commits, these actions would result in a rather cryptic make error such as: make: *** No rule to make target 'test_sgemm_2400_asm_blis_st.x', needed by 'blis-nat-st'. Stop. commit b4f47f7540062da3463e2cb91083c12fdda0d30a Author: Devin Matthews Date: Fri Jul 24 13:56:13 2020 -0500 Add BLIS_EXPORT_BLIS to bli_abort. (#429) Fixes #428. commit a69a4d7e2f4607c919db30b14535234ce169c789 Author: Field G. Van Zee Date: Wed Jul 22 16:13:09 2020 -0500 Cleaned up bool_t usage and various typecasts. Details: - Fixed various typecasts in frame/base/bli_cntx.h frame/base/bli_mbool.h frame/base/bli_rntm.h frame/include/bli_misc_macro_defs.h frame/include/bli_obj_macro_defs.h frame/include/bli_param_macro_defs.h that were missing or being done improperly/incompletely. For example, many return values were being typecast as (bool_t)x && y rather than (bool_t)(x && y) Thankfully, none of these deficiencies had manifested as actual bugs at the time of this commit. - Changed the return type of bli_env_get_var() from dim_t to gint_t. This reflects the fact that bli_env_get_var() needs to be able to return a signed integer, and even though dim_t is currently defined as a signed integer, it does not intuitively appear to necessarily be signed by inspection (i.e., an integer named "dim_t" for matrix "dimension"). Also, updated use of bli_env_get_var() within bli_pack.c to reflect the changed return type. - Redefined type of thrcomm_t.barrier_sense field from bool_t to gint_t and added comments to the bli_thrcomm_*.h files that will explain a planned replacement of bool_t with C99's bool type. - Note: These changes are being made to facilitate the substitution of 'bool' for 'bool_t', which will eliminate the namespace conflict with arm_sve.h as reported in issue #420. This commit implements the first phase of that transition. Thanks to RuQing Xu for reporting this issue. - CREDITS file update. commit a6437a5c11d364c6c88af527294d29734d7cc7d6 Author: Field G. Van Zee Date: Mon Jul 20 19:21:07 2020 -0500 Replaced broken ref99 sandbox w/ simpler version. Details: - The 'ref99' sandbox was broken by multiple refactorings and internal API changes over the last two years. Rather than try to fix it, I've replaced it with a much simpler version based on var2 of gemmsup. Why not fix the previous implementation? It occurred to me that the old implementation was trying to be a lightly simplified duplication of what exists in the framework. Duplication aside, this sandbox would have worked fine if it had been completely independent of the framework code. The problem was that it was only partially independent, with many function calls calling a function in BLIS rather than a duplicated/simplified version within the sandbox. (And the reason I didn't make it fully independent to begin with was that it seemed unnecessarily duplicative at the time.) Maintaining two versions of the same implementation is problematic for obvious reasons, especially when it wasn't even done properly to begin with. This explains the reimplementation in this commit. The only catch is that the newer implementation is single-threaded only and does not perform any packing on either input matrix (A or B). Basically, it's only meant to be a simple placeholder that shows how you could plug in your own implementation. Thanks to Francisco Igual for reporting this brokenness. - Updated the three reference gemmsup kernels (defined in ref_kernels/3/bli_gemmsup_ref.c) so that they properly handle conjugation of conja and/or conjb. The general storage kernel, which is currently identical to the column-storage kernel, is used in the new ref99 sandbox to provide basic support for all datatypes (including scomplex and dcomplex). - Minor updates to docs/Sandboxes.md, including adding the threading and packing limitations to the Caveats section. - Fixed a comment typo in bli_l3_sup_var1n2m.c (upon which the new sandbox implementation is based). commit bca040be9da542dd9c75d91890fa7731841d733d Merge: 2605eb4d 171ecc1d Author: Devin Matthews Date: Mon Jul 20 09:27:30 2020 -0500 Merge pull request #425 from gmargari/patch-1 Update Multithreading.md commit 171ecc1dc6f055ea39da30e508f711b49a734359 Author: Giorgos Margaritis Date: Mon Jul 20 12:24:06 2020 +0300 Update Multithreading.md commit 2605eb4d99d3813c37a624c011aa2459324a6d89 Author: Field G. Van Zee Date: Wed Jul 15 15:25:19 2020 -0500 Added missing rv_d?x6 edge cases to sup kernel. Details: - Added support to bli_gemmsup_rv_haswell_asm_d6x8n.c for handling various n = 6 edge cases with a single sup kernel call. Previously, only n = {4,2,1} were handled explicitly as single kernel calls; that is, cases where n = 6 were previously being executed via two kernel calls (n = 4 and n = 2). - Added commented debug line to testsuite's test_libblis.c. commit 72f6ed0637dfcb021de04ac7d214d5c87e55d799 Author: Field G. Van Zee Date: Fri Jul 3 17:55:54 2020 -0500 Declare/define static functions via BLIS_INLINE. Details: - Updated all static function definitions to use the cpp macro BLIS_INLINE instead of the static keyword. This allows blis.h to use a different keyword (inline) to define these functions when compiling with C++, which might otherwise trigger "defined but not used" warning messages. Thanks to Giorgos Margaritis for reporting this issue and Devin Matthews for suggesting the fix. - Updated the following files, which are used by configure's hardware auto-detection facility, to unconditionally #define BLIS_INLINE to the static keyword (since we know BLIS will be compiled with C, not C++): build/detect/config/config_detect.c frame/base/bli_arch.c frame/base/bli_cpuid.c - CREDITS file update. commit 5fc701ac5f94c6300febbb2f24e731aa34f0f34a Author: Field G. Van Zee Date: Wed Jul 1 15:48:58 2020 -0500 Added -fomit-frame-pointer option to CKOPTFLAGS. Details: - Added the -fomit-frame-pointer compiler option to the CKOPTFLAGS variable in the following make_defs.mk files: config/haswell/make_defs.mk config/skx/make_defs.mk as well as comments that mention why the compiler option is needed. This option is needed to prevent the compiler from using the rbp frame register (in the very early portion of kernel code, typically where k_iter and k_left are defined and computed), which, as of 1c719c9, is used explicitly by the gemmsup millikernels. Thanks to Devin Matthews for identifying this missing option and to Jeff Diamond for reporting the original bug in #417. - The file config/zen/amd_config.mk which feeds into the make_defs.mk for both zen and zen2 subconfigs, was also touched, but only to add a commented-out compiler option (and the aforementioned explanatory comment) since that file already uses -fomit-frame-pointer in COPTFLAGS, which forms the basis of CKOPTFLAGS. commit 6af59b705782dada47e45df6634b479fe781d4fe Author: Field G. Van Zee Date: Wed Jul 1 14:54:23 2020 -0500 Fixed disabled edge case optimization in gemmsup. Details: - Fixed an inadvertently disabled edge case optimization in the two gemmsup variants in bli_l3_sup_var1n2m.c. Background: These edge case optimizations allow the last millikernel operation in the jr loop to be executed with inflated an register blocksize if it is the last (or only) iteration. For example, if mr=6 and nr=8 and the gemmsup problem is m=8, n=100, k=100. (In this case, the panel-block variant (var1n) is executed, which places the jr loop in the m dimension.) In principle, this problem could be executed as two millikernels: one with dimensions 6x100x100, and one as 2x100x100. However, with the support for inflated blocksizes in the kernel, the entire 8x100x100 problem can be passed to the millikernel function, which will then execute it more favorably as two 4x100x100 millikernel sub-calls. Now, this optimization is disabled under certain circumstances, such as when multithreading. Previously, the is_mt predicate was being set incorrectly such that it was non-zero even when running single-threaded. - Upon fixing the is_mt issue above, another bit of code needed to be moved so that the result of the optimization could have an impact on the assignment of loop bounds ranges to threads. commit b37634540fab0f9b8d4751b8356ee2e17c9e3b00 Author: Field G. Van Zee Date: Thu Jun 25 16:05:12 2020 -0500 Support ldims, packing in sup/test drivers. Details: - Updated the test/sup source file (test_gemm.c) and Makefile to support building matrices with small or large leading dimensions, and updated runme.sh to support executing both kinds of test drivers. - Updated runme.sh to allow for executing sup drivers with unpacked (the default) or packed matrices (via setting BLIS_PACK_A, BLIS_PACK_B environment variables), and for capturing output to files that encode both the leading dimension (small or large) and packing status into the filenames. - Consolidated octave scripts in test/sup/octave_st, test/sup/octave_mt into test/sup/octave and updated the octave code in that consolidated directory to read the new output filename format (encoding ldim and packing). Also added comments and streamlined code, particularly in plot_panel_trxsh.m. Tested the octave scripts with octave 5.2.0. - Moved old octave_st, octave_mt directories to test/sup/old. commit ceb9b95a96cc3844ecb43d9af48ab289584e76b6 Author: Field G. Van Zee Date: Thu Jun 18 17:15:25 2020 -0500 Fixed incorrect link to shiftd in BLISTypedAPI.md. Details: - Previously, the entry for shiftd in the Operation index section of BLISTypedAPI.md was incorrectly linking to the shiftd operation entry in BLISObjectAPI.md. This has been fixed. Thanks to Jeff Diamond for helping find this incorrect link. commit b3c42016818797f79e55b32c8b7d090f9d0aa0ea Author: Field G. Van Zee Date: Thu Jun 18 14:00:56 2020 -0500 CREDITS file update. commit 31af73c11abae03248d959da0f81eacea015b57a Author: Isuru Fernando Date: Thu Jun 18 13:35:54 2020 -0500 Expand windows instructions (#414) * Expand windows instructions * Windows: both static and shared don't work at the same time commit b5b604e106076028279e6d94dc0e51b8ad48e802 Author: Field G. Van Zee Date: Wed Jun 17 16:42:24 2020 -0500 Ensure random objects' 1-norms are non-zero. Details: - Fixed an innocuous bug that manifested when running the testsuite on extremely small matrices with randomization via the "powers of 2 in narrow precision range" option enabled. When the randomization function emits a perfect 0.0 to fill a 1x1 matrix, the testsuite will then compute 0.0/0.0 during the normalization process, which leads to NaN residuals. The solution entails smarter implementaions of randv, randnv, randm, and randnm, each of which will compute the 1-norm of the vector or matrix in question. If the object has a 1-norm of 0.0, the object is re-randomized until the 1-norm is not 0.0. Thanks to Kiran Varaganti for reporting this issue (#413). - Updated the implementation of randm_unb_var1() so that it loops over a call to the randv_unb_var1() implementation directly rather than calling it indirectly via randv(). This was done to avoid the overhead of multiple calls to norm1v() when randomizing the rows/columns of a matrix. - Updated comments. commit 35e38fb693e7cbf2f3d7e0505a63b2c05d3f158d Author: Isuru Fernando Date: Tue Jun 16 10:59:41 2020 -0500 FIx typo in FAQ commit 1c719c91a3ef0be29a918097652beef35647d4b2 Author: Field G. Van Zee Date: Thu Jun 4 17:21:08 2020 -0500 Bugfixes, cleanup of sup dgemm ukernels. Details: - Fixed a few not-really-bugs: - Previously, the d6x8m kernels were still prefetching the next upanel of A using MR*rs_a instead of ps_a (same for prefetching of next upanel of B in d6x8n kernels using NR*cs_b instead of ps_b). Given that the upanels might be packed, using ps_a or ps_b is the correct way to compute the prefetch address. - Fixed an obscure bug in the rd_d6x8m kernel that, by dumb luck, executed as intended even though it was based on a faulty pointer management. Basically, in the rd_d6x8m kernel, the pointer for B (stored in rdx) was loaded only once, outside of the jj loop, and in the second iteration its new position was calculated by incrementing rdx by the *absolute* offset (four columns), which happened to be the same as the relative offset (also four columns) that was needed. It worked only because that loop only executed twice. A similar issue was fixed in the rd_d6x8n kernels. - Various cleanups and additions, including: - Factored out the loading of rs_c into rdi in rd_d6x8[mn] kernels so that it is loaded only once outside of the loops rather than multiple times inside the loops. - Changed outer loop in rd kernels so that the jump/comparison and loop bounds more closely mimic what you'd see in higher-level source code. That is, something like: for( i = 0; i < 6; i+=3 ) rather than something like: for( i = 0; i <= 3; i+=3 ) - Switched row-based IO to use byte offsets instead of byte column strides (e.g. via rsi register), which were known to be 8 anyway since otherwise that conditional branch wouldn't have executed. - Cleaned up and homogenized prefetching a bit. - Updated the comments that show the before and after of the in-register transpositions. - Added comments to column-based IO cases to indicate which columns are being accessed/updated. - Added rbp register to clobber lists. - Removed some dead (commented out) code. - Fixed some copy-paste typos in comments in the rv_6x8n kernels. - Cleaned up whitespace (including leading ws -> tabs). - Moved edge case (non-milli) kernels to their own directory, d6x8, and split them into separate files based on the "NR" value of the kernels (Mx8, Mx4, Mx2, etc.). - Moved config-specific reference Mx1 kernels into their own file (e.g. bli_gemmsup_r_haswell_ref_dMx1.c) inside the d6x8 directory. - Added rd_dMx1 assembly kernels, which seems marginally faster than the corresponding reference kernels. - Updated comments in ref_kernels/bli_cntx_ref.c and changed to using the row-oriented reference kernels for all storage combos. commit 943a21def0bedc1732c0a2453afe7c90d7f62e95 Author: Isuru Fernando Date: Thu May 21 14:09:21 2020 -0500 Add build instructions for Windows (#404) commit fbef422f0d968df10e598668b427af230cfe07e8 Author: Field G. Van Zee Date: Thu May 21 10:30:41 2020 -0500 Separate OS X and Windows into separate FAQs. Details: - Separated the unified Mac OS X / Windows frequently asked question into two separate questions, one for each OS. commit 28be1a4265ea67e3f177c391aba3dbbcf840bd52 Author: Guodong Xu Date: Thu May 21 02:22:22 2020 +0800 avoid loading twice in armv8a gemm kernel (#403) This bug happens at a corner case, when k_iter == 0 and we jump to CONSIDERKLEFT. In current design, first row/col. of a and b are loaded twice. The fix is to rearrange a and b (first row/col.) loading instructions. Signed-off-by: Guodong Xu commit d51245e58b0beff2717156b980007c90337150d8 Author: Field G. Van Zee Date: Fri May 8 18:00:54 2020 -0500 Add support for Intel oneAPI in configure. Details: - Properly select cc_vendor based on the output of invoking CC with the --version option, including cases where CC is the variant of clang that is included with Intel oneAPI. (However, we continue to treat the compiler as clang for other purposes, not icc.) Thanks to Ajay Panyala and Devin Matthews for reporting on this issue via #402. commit 787adad73bd5eb65c12c39d732723a1ac0448748 Author: Field G. Van Zee Date: Fri May 8 16:18:20 2020 -0500 Defined netlib equivalent of xerbla_array(). Details: - Added a function definition for xerbla_array_(), which largely mirrors its netlib implementation. Thanks to Isuru Fernando for suggesting the addition of this function. commit c53b5153bee585685bf95ce22e058a7af72ecef0 Author: Field G. Van Zee Date: Tue May 5 12:39:12 2020 -0500 Documented Perl prerequisite for build system. Details: - Added Perl to list of prerequisites for building BLIS. This is in part (and perhaps completely?) due to some substitution commands used at the end of configure that include '\n' characters that are not properly interpreted by the version of sed included on some versions of OS X. This new documentation addresses issue #398. commit f032d5d4a6ed34c8c3e5ba1ed0b14d1956d0097c Author: Guodong Xu Date: Thu Apr 30 01:08:46 2020 +0800 New kernel set for Arm SVE using assembly (#396) Here adds two kernels for Arm SVE vector extensions. 1. a gemm kernel for double at sizes 8x8. 2. a packm kernel for double at dimension 8xk. To achive best performance, variable length agonostic programming is not used. Vector length (VL) of 256 bits is mandated in both kernels. Kernels to support other VLs can be added later. "SVE is a vector extension for AArch64 execution mode for the A64 instruction set of the Armv8 architecture. Unlike other SIMD architectures, SVE does not define the size of the vector registers, but constrains into a range of possible values, from a minimum of 128 bits up to a maximum of 2048 in 128-bit wide units. Therefore, any CPU vendor can implement the extension by choosing the vector register size that better suits the workloads the CPU is targeting. Instructions are provided specifically to query an implementation for its register size, to guarantee that the applications can run on different implementations of the ISA without the need to recompile the code." [1] [1] https://developer.arm.com/solutions/hpc/resources/hpc-white-papers/arm-scalable-vector-extensions-and-application-to-machine-learning Signed-off-by: Guodong Xu commit 4d87eb24e8e1f5a21e04586f6df4f427bae0091b Author: Yingbo Ma Date: Mon Apr 27 17:02:47 2020 -0400 Update KernelsHowTo.md (#395) commit 477ce91c5281df2bbfaddc4d86312fb8c8f879e2 Author: Field G. Van Zee Date: Wed Apr 22 14:26:49 2020 -0500 Moved #include "cpuid.h" to bli_cpuid.c. Details: - Relocated the #include "cpuid.h" directive from bli_cpuid.h to bli_cpuid.c. This was done because cpuid.h (which is pulled into the post-build blis.h developer header) doesn't protect its definitions with a preprocessor guard of the form: #ifndef FOOBAR_H #define FOOBAR_H // header contents. #endif and as a result, applications (previously) could not #include both blis.h and cpuid.h (since the former was already including the latter). Thanks to Bhaskar Nallani for raising this issue via #393 and to Devin Matthews for suggesting this fix. - CREDITS file update. commit 8bde63ffd7474a97c3a3b0b0dc1eae45be0ab889 Author: Field G. Van Zee Date: Sat Apr 18 12:50:12 2020 -0500 Adding missing conjy to her2/syr2 in typed API doc. Details: - Fixed a missing argument (conjy) in the function signatures of bli_?her2() and bli_?syr2() in docs/BLISTypedAPI.md. Thanks to Robert van de Geijn for reporting this omission. commit 976902406b610afdbacb2d80a7a2b4b43ff30321 Author: Field G. Van Zee Date: Fri Apr 17 15:11:10 2020 -0500 Disable packing by default in expert rntm_t init. Details: - Changed the behavior of bli_rntm_init() as well as the static initializer, BLIS_RNTM_INITIALIZER, so that user-initialized rntm_t objects by default specify the disabling of packing for A and B. Packing of A/B was already disabled by default when calling non-expert APIs (and enabled only when the user set environment variables BLIS_PACK_A or BLIS_PACK_B). With this commit, the default behavior of using user-initialized rntm_t objects with expert APIs comes into line with the default behavior of non-expert APIs--that is, they now both lead to the avoidance of packing in the sup code path. (Note: The conventional code path is unaffected by the environment variables BLIS_PACK_A/BLIS_PACK_B and/or the disabling of packing in a rntm_t object when calling an expert API.) This addresses issue #392. Thanks to Kiran Varaganti for bringing this inconsistency to our attention. - The above change was accomplished by changing the the definitions of static functions bli_rntm_clear_pack_a() and bli_rntm_clear_pack_b() in bli_rntm.h, which are both for internal use only. commit 5f2aee7c5fa5d562acaf8fbde3df0e2a04e1dd1b Author: Field G. Van Zee Date: Tue Apr 7 14:55:15 2020 -0500 README.md update to promote supmt dgemm. Details: - Updated the sup entry in the "What's New" section of the README.md file to promote the multithreaded dgemm sup feature introduced in c0558fd. commit f5923cd9ff5fbd91190277dea8e52027174a1d57 Author: Field G. Van Zee Date: Tue Apr 7 14:41:45 2020 -0500 CHANGELOG update (0.7.0) commit 68b88aca6692c75a9f686187e6c4a4e196ae60a9 Author: Field G. Van Zee Date: Tue Apr 7 14:41:44 2020 -0500 Version file update (0.7.0) commit b04de636c1702e4cb8e7ad82bab3cf43d2dbdfc6 Author: Field G. Van Zee Date: Tue Apr 7 14:37:43 2020 -0500 ReleaseNotes.md update in advance of next version. Details: - Updated docs/ReleaseNotes.md in preparation for next version. commit 2cb604ba472049ad498df72d4a2dc47a161d4c3c Author: Field G. Van Zee Date: Mon Apr 6 16:42:14 2020 -0500 Rename more bli_thread_obarrier(), _obroadcast(). Details: - Renamed instances of bli_thread_obarrier() and bli_thread_obroadcast() that were made in the supmt-specific code commited to the 'amd' branch, which has now been merged with 'master'. Prior to the merge, 'master' received commit c01d249, which applied these renamings to the existing, non-sup codebase. commit efb12bc895de451067649d5dceb059b7827a025f Author: Field G. Van Zee Date: Mon Apr 6 15:01:53 2020 -0500 Minor updates/elaborations to RELEASING file. commit 2e3b3782cfb7a2fd0d1a325844983639756def7d Merge: 9f3a8d4d da0c086f Author: Field G. Van Zee Date: Mon Apr 6 14:55:35 2020 -0500 Merge branch 'master' into amd commit da0c086f4643772e111318f95a712831b0f981a8 Author: Satish Balay Date: Tue Mar 31 17:09:41 2020 -0500 OSX: specify the full path to the location of libblis.dylib (#390) * OSX: specify the full path to the location of libblis.dylib so that it can be found at runtime Before this change: Appication gives runtime error [when linked with blis] dyld: Library not loaded: libblis.3.dylib balay@kpro lib % otool -L libblis.dylib libblis.dylib: libblis.3.dylib (compatibility version 0.0.0, current version 0.0.0) /usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1281.0.0) After this change: balay@kpro lib % otool -L libblis.dylib libblis.dylib: /Users/balay/petsc/arch-darwin-c-debug/lib/libblis.3.dylib (compatibility version 0.0.0, current version 0.0.0) /usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1281.0.0) * INSTALL_LIBDIR -> libdir as INSTALL_LIBDIR has DESTDIR Co-Authored-By: Jed Brown * CREDITS file update. Co-authored-by: Jed Brown Co-authored-by: Field G. Van Zee commit 2bca03ea9d87c0da829031a5332545d05e352211 Author: Field G. Van Zee Date: Sat Mar 28 22:10:00 2020 +0000 Updates, tweaks to runme.sh in test/1m4m. Details: - Made several updates to test/1m4m/runme.sh, including: - Added missing handling for 1m and 4m1a implementations when setting the BLIS_??_NT environment variables. - Added support for using numactl to run the test executables. - Several other cleanups. commit c40a33190b94af5d5c201be63366594859b1233f Author: Field G. Van Zee Date: Thu Mar 26 16:55:00 2020 -0500 Warn user when auto-detection returns 'generic'. Details: - Added logic to configure that causes the script to output a warning to the user if/when "./configure auto" is run and the underlying hardware feature detection code is unable to identify the hardware. In these cases, the auto-detect code will return 'generic', which is likely not what the user expected, and a flag will be set so that a message is printed at the end of the configure output. (Thankfully, we don't expect this scenario to play out very often.) Thanks to Devin Matthews for suggesting this fix #384. commit 492a736fab5b9c882996ca024b64646877f22a89 Author: Devin Matthews Date: Tue Mar 24 17:28:47 2020 -0500 Fix vectorized version of bli_amaxv (#382) * Fix vectorized version of bli_amaxv To match Netlib, i?amax should return: - the lowest index among equal values - the first NaN if one is encountered * Fix typos. * And another one... * Update ref. amaxv kernel too. * Re-enabled optimized amaxv kernels. Details: - Re-enabled the optimized, intrinsics-based amaxv kernels in the 'zen' kernel set for use in haswell, zen, zen2, knl, and skx subconfigs. These two kernels (for s and d datatypes) were temporarily disabled in e186d71 as part of issue #380. However, the key missing semantic properties that prompted the disabling of these kernels--returning the index of the *first* rather than of the last element with largest absolute value, and returning the index of the first NaN if one is encountered--were added as part of #382 thanks to Devin Matthews. Thus, now that the kernels are working as expected once more, this commit causes these kernels to once again be registered for the affected subconfigs, which effectively reverts all code changes included in e186d71. - Whitespace/formatting updates to new macros in bli_amaxv_zen_int.c. Co-authored-by: Field G. Van Zee commit e186d7141a51f2d7196c580e24e7b7db8f209db9 Author: Field G. Van Zee Date: Sat Mar 21 18:40:36 2020 -0500 Disabled optimized amaxv kernels. Details: - Disabled use of optimized amaxv kernels, which use vector intrinsics for both 's' and 'd' datatypes. We disable these kernels because the current implementations fail to observe a semantic property of the BLAS i?amax_() subroutine, which is to return the index of the *first* element containing the maximum absolute value (that is, the first element if there exist two or more elements that contain the same value). With the optimized kernels disabled, the affected subconfigurations (haswell, zen, zen2, knl, and skx) will use the default reference implementations. Thanks to Mat Cross for reporting this issue via #380. - CREDITS file update. commit 9f3a8d4d851725436b617297231a417aa9ce8c6a Author: Field G. Van Zee Date: Sat Mar 14 17:48:43 2020 -0500 Added missing return to bli_thread_partition_2x2(). Details: - Added a missing return statement to the body of an early case handling branch in bli_thread_partition_2x2(). This bug only affected cases where n_threads < 4, and even then, the code meant to handle cases where n_threads >= 4 executes and does the right thing, albeit using more CPU cycles than needed. Nonetheless, thanks to Kiran Varaganti for reporting this bug via issue #377. - Whitespace changes to bli_thread.c (spaces -> tabs). commit 8c3d9b9eeb6f816ec8c32a944f632a5ad3637593 Merge: 71249fe8 0f9e0399 Author: Field G. Van Zee Date: Tue Mar 10 14:03:33 2020 -0500 Merge branch 'amd' of github.com:flame/blis into amd commit 71249fe8ddaa772616698f1e3814d40e012909ea Author: Field G. Van Zee Date: Tue Mar 10 13:55:29 2020 -0500 Merged test/sup, test/supmt into test/sup. Details: - Updated the Makefile, test_gemm.c, and runme.sh in test/sup to be able to compile and run both single-threaded and multithreaded experiments. This should help with maintenance going forward. - Created a test/sup/octave_st directory of scripts (based on the previous test/sup/octave scripts) as well as a test/sup/octave_mt directory (based on the previous test/supmt/octave scripts). The octave scripts are slightly different and not easily mergeable, and thus for now I'll maintain them separately. - Preserved the previous test/sup directory as test/sup/old/supst and the previous test/supmt directory as test/sup/old/supmt. commit 0f9e0399e16e96da2620faf2c0c3c21274bb2ebd Author: Field G. Van Zee Date: Thu Mar 5 17:03:21 2020 -0600 Updated sup performance graphs; added mt results. Details: - Reran all existing single-threaded performance experiments comparing BLIS sup to other implementations (including the conventional code path within BLIS), using the latest versions (where appropriate). - Added multithreaded results for the three existing hardware types showcased in docs/PerformanceSmall.md: Kaby Lake, Haswell, and Epyc (Zen1). - Various minor updates to the text in docs/PerformanceSmall.md. - Updates to the octave scripts in test/sup/octave, test/supmt/octave. commit 90db88e5729732628c1f3acc96eeefab49f2da41 Author: Field G. Van Zee Date: Mon Mar 2 15:06:48 2020 -0600 Updated sup[mt] Makefiles for variable dim ranges. Details: - Updated test/sup/Makefile and test/supmt/Makefile to allow specifying different problem size ranges for the drivers where one, two, or three matrix dimensions is large. This will facilitate the generation of more meaningful graphs, particularly when two dimensions are tiny. commit 31f11a06ea9501724feec0d2fc5e4644d7dd34fc Author: Field G. Van Zee Date: Thu Feb 27 14:33:20 2020 -0600 Updates to octave scripts in test/sup[mt]/octave. Details: - Optimized scripts in test/sup/octave and test/supmt/octave for use with octave 5.2.0 on Ubuntu 18.04. - Fixed stray 'end' keywords in gen_opsupnames.m and plot_l3sup_perf.m, which were not only unnecessary but also causing issues with versions 5.x. commit c01d249d7c546fe2e3cee3fe071cd4c4c88b9115 Author: Field G. Van Zee Date: Tue Feb 25 14:50:53 2020 -0600 Renamed bli_thread_obarrier(), _obroadcast(). Details: - Renamed two bli_thread_*() APIs: bli_thread_obarrier() -> bli_thread_barrier() bli_thread_obroadcast() -> bli_thread_broadcast() The 'o' was a leftover from when thrcomm_t objects tracked both "inner" and "outer" communicators. They have long since been simplified to only support the latter, and thus the 'o' is superfluous. commit f6e6bf73e695226c8b23fe7900da0e0ef37030c1 Author: Field G. Van Zee Date: Mon Feb 24 17:52:23 2020 -0600 List Gentoo under supported external packages. Details: - Add mention of Gentoo Linux under the list of external packages in the README.md file. Thanks to M. Zhou for maintaining this package. commit 9e5f7296ccf9b3f7b7041fe1df20b927cd0e914b Author: Field G. Van Zee Date: Tue Feb 18 15:16:03 2020 -0600 Skip building thrinfo_t tree when mt is disabled. Details: - Return early from bli_thrinfo_sup_grow() if the thrinfo_t object address is equal to either &BLIS_GEMM_SINGLE_THREADED or &BLIS_PACKM_SINGLE_THREADED. - Added preprocessor logic to bli_l3_sup_thread_decorator() in bli_l3_sup_decor_single.c that (by default) disables code that creates and frees the thrinfo_t tree and instead passes &BLIS_GEMM_SINGLE_THREADED as the thrinfo_t pointer into the sup implementation. - The net effect of the above changes is that a small amount of thrinfo_t overhead is avoided when running small/skinny dgemm problems when BLIS is compiled with multithreading disabled. commit 90081e6a64b5ccea9211bdef193c2d332c68492f Author: Field G. Van Zee Date: Mon Feb 17 14:57:25 2020 -0600 Fixed bug(s) in mt sup when single-threaded. Details: - Fixed a syntax bug in bli_l3_sup_decor_single.c as a result of changing function interface for the thread entry point function (of type l3supint_t). - Unfortunately, fixing the interface was not enough, as it caused a memory leak in the sba at bli_finalize() time. It turns out that, due to the new multithreading-capable variant code useing thrinfo_t objects--specifically, their calling of bli_thrinfo_grow()--we have to pass in a real thrinfo_t object rather than the global objects &BLIS_PACKM_SINGLE_THREADED or &BLIS_GEMM_SINGLE_THREADED. Thus, I inserted the appropriate logic from the OpenMP and pthreads versions so that single-threaded execution would work as intended with the newly upgraded variants. commit c0558fde4511557c8f08867b035ee57dd2669dc6 Author: Field G. Van Zee Date: Mon Feb 17 14:08:08 2020 -0600 Support multithreading within the sup framework. Details: - Added multithreading support to the sup framework (via either OpenMP or pthreads). Both variants 1n and 2m now have the appropriate threading infrastructure, including data partitioning logic, to parallelize computation. This support handles all four combinations of packing on matrices A and B (neither, A only, B only, or both). This implementation tries to be a little smarter when automatic threading is requested (e.g. via BLIS_NUM_THREADS) in that it will recalculate the factorization in units of micropanels (rather than using the raw dimensions) in bli_l3_sup_int.c, when the final problem shape is known and after threads have already been spawned. - Implemented bli_?packm_sup_var2(), which packs to conventional row- or column-stored matrices. (This is used for the rrc and crc storage cases.) Previously, copym was used, but that would no longer suffice because it could not be parallelized. - Minor reorganization of packing-related sup functions. Specifically, bli_packm_sup_init_mem_[ab]() are called from within packm_sup_[ab]() instead of from the variant functions. This has the effect of making the variant functions more readable. - Added additional bli_thrinfo_set_*() static functions to bli_thrinfo.h and inserted usage of these functions within bli_thrinfo_init(), which previously was accessing thrinfo_t fields via the -> operator. - Renamed bli_partition_2x2() to bli_thread_partition_2x2(). - Added an auto_factor field to the rntm_t struct in order to track whether automatic thread factorization was originally requested. - Added new test drivers in test/supmt that perform multithreaded sup tests, as well as appropriate octave/matlab scripts to plot the resulting output files. - Added additional language to docs/Multithreading.md to make it clear that specifying any BLIS_*_NT variable, even if it is set to 1, will be considered manual specification for the purposes of determining whether to auto-factorize via BLIS_NUM_THREADS. - Minor comment updates. commit d7a7679182d72a7eaecef4cd9b9a103ee0a7b42b Author: Field G. Van Zee Date: Fri Feb 7 17:37:03 2020 -0600 Fixed int-to-packbuf_t conversion error (C++ only). Details: - Fixed an error that manifests only when using C++ (specifically, modern versions of g++) to compile drivers in 'test' (and likely most other application code that #includes blis.h. Thanks to Ajay Panyala for reporting this issue (#374). commit d626112b8d5302f9585fb37a8e37849747a2a317 Author: Field G. Van Zee Date: Wed Jan 15 13:27:02 2020 -0600 Removed sorting on LDFLAGS in common.mk (#373). Details: - Removed a line of code in common.mk that passed LDFLAGS through the sort function. The purpose was not to sort the contents, but rather to remove duplicates. However, there is valid syntax in a string of linker flags that, when sorted, yields different/broken behavior. So I've removed the line in common.mk that sorts LDFLAGS. Also, for future use, I've added a new function, rm-dupls, that removes duplicates without sorting. (This function was based on code from a stackoverflow thread that is linked to in the comments for that code.) Thanks to Isuru Fernando for reporting this issue (#373). commit e67deb22aaeab5ed6794364520190936748ef272 Author: Field G. Van Zee Date: Tue Jan 14 16:01:34 2020 -0600 CHANGELOG update (0.6.1) commit 10949f528c5ffc5c3a2cad47fe16a802afb021be Author: Field G. Van Zee Date: Tue Jan 14 16:01:33 2020 -0600 Version file update (0.6.1) commit 5db8e710a2baff121cba9c63b61ca254a2ec097a Author: Field G. Van Zee Date: Tue Jan 14 15:59:59 2020 -0600 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. commit cde4d9d7a26eb51dcc5a59943361dfb8fda45dea Author: Field G. Van Zee Date: Tue Jan 14 15:19:25 2020 -0600 Removed 'attic/windows' (to prevent confusion). Details: - Finally removed 'attic/windows' and its contents. This directory once contained "proto" Windows support for BLIS, but we've since moved on to (thanks to Isuru Fernando) providing Windows DLL support via AppVeyor's build artifacts. Furthermore, since 'windows' was the only subdirectory within 'attic', the directory path would show up in GitHub's listing at https://github.com/flame/blis, which probably led to someone being confused about how BLIS provides Windows support. I assume (but don't know for sure) that nobody is using these files, so this is admittedly a case of shoot first and ask questions later. commit 7d3407d4681c6449f4bbb8ec681983700ab968f3 Author: Field G. Van Zee Date: Tue Jan 14 15:17:53 2020 -0600 CREDITS file update. commit f391b3e2e7d11a37300d4c8d3f6a584022a599f5 Author: Dave Love Date: Mon Jan 6 20:15:48 2020 +0000 Fix parsing in vpu_count on workstation SKX (#351) * Fix parsing in vpu_count on workstation SKX * Document Skylake-X as Haswell for single FMA * Update vpu_count for Skylake and Cascade Lake models * Support printing the configuration selected, controlled by the environment Intended particularly for diagnosing mis-selection of SKX through unknown, or incorrect, number of VPUs. * Move bli_log outside the cpp condition, and use it where intended * Add Fixme comment (Skylake D) * Mostly superficial edits to commits towards #351. Details: - Moved architecture/sub-config logging-related code from bli_cpuid.c to bli_arch.c, tweaked names, and added more set/get layering. - Tweaked log messages output from bli_cpuid_is_skx() in bli_cpuid.c. - Content, whitespace changes to new bullet in HardwareSupport.md that relates to single-VPU Skylake-Xs. * Fix comment typos Co-authored-by: Field G. Van Zee commit 5ca1a3cfc1c1cc4dd9da6a67aa072ed90f07e867 Author: Field G. Van Zee Date: Mon Jan 6 12:29:12 2020 -0600 Fixed 'configure' breakage introduced in 6433831. Details: - Added a missing 'fi' (endif) keyword to a conditional block added in the configure script in commit 6433831. commit e7431b4a834ef4f165c143f288585ce8e2272a23 Author: Field G. Van Zee Date: Mon Jan 6 12:01:41 2020 -0600 Updated 1m draft article link in README.md. commit 6433831cc3988ad205637ebdebcd6d8f7cfcf148 Author: Jeff Hammond Date: Fri Jan 3 17:52:49 2020 -0800 blacklist ICC 18 for knl/skx due to test failures Signed-off-by: Jeff Hammond commit af3589f1f98781e3a94a8f9cea8d5ea6f155f7d2 Author: Jeff Hammond Date: Fri Jan 3 13:23:24 2020 -0800 blacklist Intel 19+ Signed-off-by: Jeff Hammond commit 60de939debafb233e57fd4e804ef21b6de198caf Author: Jeff Hammond Date: Wed Jan 1 21:30:38 2020 -0800 fix link to docs the comment contains an incorrect link, which is trivially fixed here. @fgvanzee I hope you don't mind that I committed directly to master but this cannot break anything. commit 52711073789b6b84eb99bb0d6883f457ed3fcf80 Author: Field G. Van Zee Date: Mon Dec 16 16:30:26 2019 -0600 Fixed bugs in cblas_sdsdot(), sdsdot_(). Details: - Fixed a bug in sdsdot_sub() that redundantly added the "alpha" scalar, named 'sb'. This value was already being added by the underlying sdsdot_() function. Thus, we no longer add 'sb' within sdsdot_sub(). Thanks to Simon Lukas Märtens for reporting this bug via #367. - Fixed a second bug in order of typecasting intermediate products in sdsdot_(). Previously, the "alpha" scalar was being added after the "outer" typecast to float. However, the operation is supposed to first add the dot product to the (promoted) scalar and THEN downcast the sum to float. Thanks to Devin Matthews for catching this bug. commit fe2560a4b1d8ef8d0a446df6002b1e7decc826e9 Author: Field G. Van Zee Date: Fri Dec 6 17:12:44 2019 -0600 Annoted missing thread-related symbols for export. Details: - Added BLIS_EXPORT_BLIS annotation to function prototypes for bli_thrcomm_bcast() bli_thrcomm_barrier() bli_thread_range_sub() so that these functions are exported to shared libraries by default. This (hopefully) fixes issue #366. Thanks to Kyungmin Lee for reporting this bug. - CREDITS file update. commit 2853825234001af8f175ad47cef5d6ff9b7a5982 Merge: efa61a6c 61b1f0b0 Author: Field G. Van Zee Date: Fri Dec 6 16:06:46 2019 -0600 Merge branch 'master' into amd commit 61b1f0b0602faa978d9912fe58c6c952a33af0ac Author: Nicholai Tukanov Date: Wed Dec 4 14:18:47 2019 -0600 Add prototypes for POWER9 reference kernels (#365) Updates and fixes to power9 subconfig. Details: - Register s,c,z reference gemm and trsm ukernels that assume elements of B have been broadcast. - Added prototypes for level-3 ukernels that assume elements of B have been broadcast. Also added prototype for an spackm function that employs a duplication/broadcast factor of 4. - Register virtual gemmtrsm ukernels that work with broadcasting of B. - Disable right-side hemm, symm, trmm, and trmm3 in bli_family_power9.h. - Thanks to Nicholai Tukanov for providing these updates. commit efa61a6c8b1cfa48781fc2e4799ff32e1b7f8f77 Author: Field G. Van Zee Date: Fri Nov 29 16:17:04 2019 -0600 Added missing bli_l3_sup_thread_decorator() symbol. Details: - Defined dummy versions of bli_l3_sup_thread_decorator() for Openmp and pthreads so that those builds don't fail when performing shared library linking (especially for Windows DLLs via AppVeyor). For now, these dummy implementations of bli_l3_sup_thread_decorator() are merely carbon-copies of the implementation provided for single- threaded execution (ie: the one found in bli_l3_sup_decor_single.c). Thus, an OpenMP or pthreads build will be able to use the gemmsup code (including the new selective packing functionality), as it did before 39fa7136, even though it will not actually employ any multithreaded parallelism. commit 39fa7136f4a4e55ccd9796fb79ad5f121b872ad9 Author: Field G. Van Zee Date: Fri Nov 29 15:27:07 2019 -0600 Added support for selective packing to gemmsup. Details: - Implemented optional packing for A or B (or both) within the sup framework (which currently only supports gemm). The request for packing either matrix A or matrix B can be made via setting environment variables BLIS_PACK_A or BLIS_PACK_B (to any non-zero value; if set, zero means "disable packing"). It can also be made globally at runtime via bli_pack_set_pack_a() and bli_pack_set_pack_b() or with individual rntm_t objects via bli_rntm_set_pack_a() and bli_rntm_set_pack_b() if using the expert interface of either the BLIS typed or object APIs. (If using the BLAS API, environment variables are the only way to communicate the packing request.) - One caveat (for now) with the current implementation of selective packing is that any blocksize extension registered in the _cntx_init function (such as is currently used by haswell and zen subconfigs) will be ignored if the affected matrix is packed. The reason is simply that I didn't get around to implementing the necessary logic to pack a larger edge-case micropanel, though this is entirely possible and should be done in the future. - Spun off the variant-choosing portion of bli_gemmsup_ref() into bli_gemmsup_int(), in bli_l3_sup_int.c. - Added new files, bli_l3_sup_packm_a.c, bli_l3_sup_packm_b.c, along with corresponding headers, in which higher-level packm-related functions are defined for use within the sup framework. The actual packm variant code resides in bli_l3_sup_packm_var.c. - Pass the following new parameters into var1n and var2m: packa, packb bool_t's, pointer to a rntm_t, pointer to a cntl_t (which is for now always NULL), and pointer to a thrinfo_t* (which for nowis the address of the global single-threaded packm thread control node). - Added panel strides ps_a and ps_b to the auxinfo_t structure so that the millikernel can query the panel stride of the packed matrix and step through it accordingly. If the matrix isn't packed, the panel stride of interest for the given millikernel will be set to the appropriate value so that the mkernel may step through the unpacked matrix as it normally would. - Modified the rv_6x8m and rv_6x8n millikernels to read the appropriate panel strides (ps_a and ps_b, respectively) instead of computing them on the fly. - Spun off the environment variable getting and setting functions into a new file, bli_env.c (with a corresponding prototype header). These functions are now used by the threading infrastructure (e.g. BLIS_NUM_THREADS, BLIS_JC_NT, etc.) as well as the selective packing infrastructure (e.g. BLIS_PACK_A, BLIS_PACK_B). - Added a static initializer for mem_t objects, BLIS_MEM_INITIALIZER. - Added a static initializer for pblk_t objects, BLIS_PBLK_INITIALIZER, for use within the definition of BLIS_MEM_INITIALIZER. - Moved the global_rntm object to bli_rntm.c and extern it where needed. This means that the function bli_thread_init_rntm() was renamed to bli_rntm_init_from_global() and relocated accordingly. - Added a new bli_pack.c function, which serves as the home for functions that manage the pack_a and pack_b fields of the global rntm_t, including from environment variables, just as we have functions to manage the threading fields of the global rntm_t in bli_thread.c. - Reorganized naming for files in frame/thread, which mostly involved spinning off the bli_l3_thread_decorator() functions into their own files. This change makes more sense when considering the further addition of bli_l3_sup_thread_decorator() functions (for now limited only to the single-threaded form found in the _single.c file). - Explicitly initialize the reference sup handlers in both bli_cntx_init_haswell.c and bli_cntx_init_zen.c so that it's more obvious how to customize to a different handler, if desired. - Removed various snippets of disabled code. - Various comment updates. commit bbb21fd0a9be8c5644bec37c75f9396eeeb69e48 Author: Field G. Van Zee Date: Thu Nov 21 18:15:16 2019 -0600 Tweaked SIAM/SC Best Prize language in README.md. commit 043366f92d5f5f651d5e3371ac3adb36baf4adce Author: Field G. Van Zee Date: Thu Nov 21 18:13:51 2019 -0600 Fixed typo in previous commit (SIAM/SC prize). commit 05a4d583e65a46ff2a1100ab4433975d905d91f9 Author: Field G. Van Zee Date: Thu Nov 21 18:12:24 2019 -0600 Added SIAM/SC prize to "What's New" in README.md. commit 881b05ecd40c7bc0422d3479a02a28b1cb48383f Author: Field G. Van Zee Date: Thu Nov 21 16:34:27 2019 -0600 Fixed blastest failure for 'generic' subconfig. Details: - Fixed a subtle and complicated bug that only manifested via the BLAS test drivers in the generic subconfiguration, and possibly any other subconfiguration that did not register complex-domain gemm ukernels, or registered ONLY real-domain ukernels as row-preferential. This is a long story, but it boils down to an exception to the "transpose the operation to bring storage of C into agreement with ukernel pref" optimization in bli_hemm_front.c and bli_symm_front.c sabotaging the proper functioning of the 1m method, but only when the imaginary component of beta is zero. See the comments in issue #342 for more details. Thanks to Dave Love for identifying the commit in which this bug was introduced, and other feedback related to this bug. commit 0c7165fb01cdebbc31ec00124d446161b289942f Author: Field G. Van Zee Date: Thu Nov 14 16:48:14 2019 -0600 Fixed obscure bug in bli_acquire_mpart_[mn]dim(). Details: - Fixed a bug in bli_acquire_mpart_mdim(), bli_acquire_mpart_ndim(), and bli_acquire_mpart_mndim() that allowed the use of a blocksize b that is too large given the current row/column index (i.e., the i/j argument) and the size of the dimension being partitioned (i.e., the m/n argument). This bug only affected backwards partitioning/motion through the dimension and was the result of a misplaced conditional check-and-redirect to the backwards code path. It should be noted that this bug was discovered not because it manifested the way it could (thanks to the callers in BLIS making sure to always pass in the "correct" blocksize b), but could have manifested if the functions were used by 3rd party callers. Thanks to Minh Quan Ho for reporting the bug via issue #363. commit fb8bef9982171ee0f60bc39e41a33c4d31fd59a9 Author: Field G. Van Zee Date: Thu Nov 14 13:05:28 2019 -0600 Fixed copy-paste bug in bli_spackm_6xk_bb4_ref(). Details: - Fixed a copy-paste bug in the new bli_spackm_6xk_bb4_ref() that manifested as failures in single-precision real level-3 operations. Also replaced the duplication factor constants with a const-qualifed varialbe, dfac, so that this won't happen again. - Changed NC for single-precision real from 4080 to 8160 so that the packed matrix B will have the same byte footprint in both single and double real. commit 8f399c89403d5824ba767df1426706cf2d19d0a7 Author: Field G. Van Zee Date: Tue Nov 12 15:32:57 2019 -0600 Tweaked/added notes to docs/Multithreading.md. Details: - Added language to docs/Multithreading.md cautioning the reader about the nuances of setting multithreading parameters via the manual and automatic ways simultaneously, and also about how these parameters behave when multithreading is disabled at configure-time. These changes are an attempt to address the issues that arose in issue #362. Thanks to Jérémie du Boisberranger for his feedback on this topic. - CREDITS file update. commit bdc7ee3394500d8e5b626af6ff37c048398bb27e Author: Field G. Van Zee Date: Mon Nov 11 15:47:17 2019 -0600 Various fixes to support packing duplication in B. Details: - Added cpp macros to trmm and trmm3 front-ends to optionally force those operations to be cast so the structured matrix is on the left. symm and hemm already had such macros, but these too were renamed so that the macros were individual to the operation. We now have four such macros: #define BLIS_DISABLE_HEMM_RIGHT #define BLIS_DISABLE_SYMM_RIGHT #define BLIS_DISABLE_TRMM_RIGHT #define BLIS_DISABLE_TRMM3_RIGHT Also, updated the comments in the symm and hemm front-ends related to the first two macro guards, and added corresponding comments to the trmm and trmm3 front-ends for the latter two guards. (They all functionally do the same thing, just for their specific operations.) Thanks to Jeff Hammond for reporting the bugs that led me to this change (via #359). - Updated config/old/haswellbb subconfiguration (used to debug issues related to duplicating B during packing) to register: a packing kernel for single-precision real; gemmbb ukernels for s, c, and z; trsmbb ukernels for s, c, and z; gemmtrsmbb virtual ukrnels for s, c and z; and to use non-default cache and register blocksizes for s, c, and z datatypes. Also declared prototypes for all of the gemmbb, trsmbb, and gemmtrsmbb ukernel functions within the bli_cntx_init_haswellbb() function. This should, once applied to the power9 configuration, fix the remaining issues in #359. - Defined bli_spackm_6xk_bb4_ref(), which packs single reals with a duplication factor of 4. This function is defined in the same file as bli_dpackm_6xk_bb2_ref() (bli_packm_cxk_bb_ref.c). commit 0eb79ca8503bd7b237994335b9687457227d3290 Author: Field G. Van Zee Date: Fri Nov 8 14:48:48 2019 -0600 Avoid unused variable warning in lread.c (#356). Details: - Replaced the line f = f; with ( void )f; for the unused variable 'f' in blastest/f2c/lread.c. (Hopefully) addresses issue #356, but since we don't use xlc who knows. Thanks to Jeff Hammond for reporting this. commit f377bb448512f0b578263387eed7eaf8f2b72bb7 Author: Jérôme Duval Date: Thu Nov 7 23:39:29 2019 +0100 Add Haiku to the known OS list (#361) commit e29b1f9706b6d9ed798b7f6325f275df4e6be973 Author: Field G. Van Zee Date: Tue Nov 5 17:15:19 2019 -0600 Fixed failing testsuite gemmtrsm_ukr for power9. Details: - Added code that fixes false failures in the gemmtrsm_ukr module of the testsuite. The tests were failing because the computation (bli_gemv()) that performs the numerical check was not able to properly travserse the matrix operands bx1 and b11 that are views into the micropanel of B, which has duplicated/broadcast elements under the power9 subconfig. (For example, a micropanel of B with duplication factor of 2 needs to use a column stride of 2; previously, the column stride was being interpreted as 1.) - Defined separate bli_obj_set_row_stride() and bli_obj_set_col_stride() static functions in bli_obj_macro_defs.h. (Previously, only the function bli_obj_set_strides() was defined. Amazing to think that we got this far without these former functions.) - Updated/expounded upon comments. commit 49177a6b9afcccca5b39a21c6fd8e243525e1505 Author: Field G. Van Zee Date: Mon Nov 4 18:09:37 2019 -0600 Fixed latent testsuite ukr module bugs for power9. Details: - Fixed a latent bug in the testsuite ukernel modules (gemm, trsm, and gemmtrsm) that only manifested once we began running with parameters that mimic those of power9. The problem was rooted in the way those modules were creating objects (and thus allocating memory) for the micropanel operands to the microkernel being tested. Since power9 duplicates/broadcasts elements of B in memory, we needed an easy way of asking for more than one storage element per logical element in the matrix. I incorrectly expressed this as: bli_obj_create( datatype, k, n, ldbp, 1, &bp ); The problem here is that bli_obj_create() is exceedingly efficient at calculating the size it passes to malloc() and doesn't allocate a full leading dimension's worth of elements for the last column (or row, in this example). This would normally not bother anyone since you're not supposed to access that memory anyway. But here, my attempted "hack" for getting extra elements was insufficient, and needed to be changed to: bli_obj_create( datatype, k, ldbp, ldbp, 1, &bp ); That is, the extra elements needed to be baked into the dimensions of the matrix object in order to have the intended effect on the number of elements actually allocated. Thanks to Jeff Hammond for reporting this bug. - Fixed a typically harmless memory leak in the aforementioned test modules (the objects for the packed micropanels were not being freed). - Updated/expanded a common comment across all three ukr test modules. commit c84391314d4f1b3f73d868f72105324e649f2a72 Author: Field G. Van Zee Date: Mon Nov 4 13:57:12 2019 -0600 Reverted minor temp/wspace changes from b426f9e. Details: - Added missing license header to bli_pwr9_asm_macros_12x6.h. - Reverted temporary changes to various files in 'test' and 'testsuite' directories. - Moved testsuite/jobscripts into testsuite/old. - Minor whitespace/comment changes across various files. commit 4870260f6b8c06d2cc01b7147d7433ddee213f7f Author: Jeff Hammond Date: Mon Nov 4 11:55:47 2019 -0800 blacklist GCC 5 and older for POWER9 (#360) commit b426f9e04e5499c6f9c752e49c33800bfaadda4c Author: Nicholai Tukanov Date: Fri Nov 1 17:57:03 2019 -0500 POWER9 DGEMM (#355) Implemented and registered power9 dgemm ukernel. Details: - Implemented 12x6 dgemm microkernel for power9. This microkernel assumes that elements of B have been duplicated/broadcast during the packing step. The microkernel uses a column orientation for its microtile vector registers and thus implements column storage and general stride IO cases. (A row storage IO case via in-register transposition may be added at a future date.) It should be noted that we recommend using this microkernel with gcc and *not* xlc, as issues with the latter cropped up during development, including but not limited to slightly incompatible vector register mnemonics in the GNU extended inline assembly clobber list. commit 58102aeaa282dc79554ed045e1b17a6eda292e15 Merge: 52059506 b9bc222b Author: Field G. Van Zee Date: Mon Oct 28 17:58:31 2019 -0500 Merge branch 'amd' commit 52059506b2d5fd4c3738165195abeb356a134bd4 Author: Field G. Van Zee Date: Wed Oct 23 15:26:42 2019 -0500 Added "How to Download BLIS" section to README.md. Details: - Added a new section to the README.md, just prior to the "Getting Started" section, titled "How to Download BLIS". This section details the user's options for obtaining BLIS and lays out four common ways of downloading the library. Thanks to Jeff Diamond for his feedback on this topic. commit e6f0a96cc59aef728470f6850947ba856148c38a Author: Field G. Van Zee Date: Mon Oct 14 17:05:39 2019 -0500 Updated README.md to ack Facebook as funder. commit b9bc222bfc3db4f9ae5d7b3321346eed70c2c3fb Author: Field G. Van Zee Date: Mon Oct 14 16:38:15 2019 -0500 Call bli_syrk_small() before error checking. Details: - In bli_syrk_front(), moved the conditional call to bli_syrk_check() (if error checking is enabled) and the conditional scaling of C by beta (if alpha is zero) so that they occur after, instead of before, the call to bli_syrk_small(). This sequencing now matches that of bli_gemm_small() in bli_gemm_front() and bli_trsm_small() in bli_trsm_front(). commit f0959a81dbcf30d8a1076d0a6348a9835079d31a Author: Field G. Van Zee Date: Mon Oct 14 15:46:28 2019 -0500 When manual config is blacklisted, output error. Details: - Fixed and adjusted the logic in configure so that a more informative error message is output when a user runs './configure ... ' and is present in the configuration blacklist. Previously, this particular set of conditions would result in the message: 'user-specified configuration '' is NOT registered! That is, the error message mis-identified the targeted configuration as the empty string, and (more importantly) mis-identifies the problem. Thanks to Tze Meng Low for reporting this issue. - Fixed a nearby error messages somewhat unrelated to the issue above. Specifically, the wrong string was being printed when the error message was identifying an auto-detected configuration that did not appear to be registered. commit 6218ac95a525eefa8921baf8d0d7057dfacebe9c Merge: 0016d541 a617301f Author: Field G. Van Zee Date: Fri Oct 11 11:53:51 2019 -0500 Merge branch 'master' into amd commit 0016d541e6b0da617b1fae6612d2b314901b7a75 Author: Field G. Van Zee Date: Fri Oct 11 11:09:44 2019 -0500 Changed -march=znver2 to =znver1 for clang on zen2. Details: - In config/zen2/make_defs.mk, changed the -march= flag so that -march=znver1 is used instead of -march=znver2 when CC_VENDOR is clang. (The gcc branch attempts to differentiate between various versions, but the equivalent version cutoffs for clang are not yet known by us, so we have to use a single flag for all versions of clang. Hopefully -march=znver1 is new enough. If not, we'll fall back to -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp.) This issue was discovered thanks to AppVeyor. commit e94a0530e5ac4c78a18f09105f40003be2b517f7 Author: Field G. Van Zee Date: Fri Oct 11 10:48:27 2019 -0500 Corrected zen NC that was non-multiple of NR. Details: - Updated an incorrectly set cache blocksize NC for single real within config/zen/bli_cntx_init_zen.c that was non a multiple of the corresponding value of NR. This issue, which was caught by Travis CI, was introduced in 29b0e1e. commit a2ffac752076bf55eb8c1fe2c5da8d9104f1f85b Merge: 1cfe8e25 29b0e1ef Author: Field G. Van Zee Date: Fri Oct 11 10:31:18 2019 -0500 Merge branch 'amd-master' into amd commit 29b0e1ef4e8b84ce76888d73c090009b361f1306 Merge: 1cfe8e25 fdce1a56 Author: Field G. Van Zee Date: Fri Oct 11 10:24:24 2019 -0500 Code review + tweaks to AMD's AOCL 2.0 PR (#349). Details: - NOTE: This is a merge commit of 'master' of git://github.com/amd/blis into 'amd-master' of flame/blis. - Fixed a bug in the downstream value of BLIS_NUM_ARCHS, which was inadvertantly not incremented when the Zen2 subconfiguration was added. - In bli_gemm_front(), added a missing conditional constraint around the call to bli_gemm_small() that ensures that the computation precision of C matches the storage precision of C. - In bli_syrk_front(), reorganized and relocated the notrans/trans logic that existed around the call to bli_syrk_small() into bli_syrk_small() to minimize the calling code footprint and also to bring that code into stylistic harmony with similar code in bli_gemm_front() and bli_trsm_front(). Also, replaced direct accessing of obj_t fields with proper accessor static functions (e.g. 'a->dim[0]' becomes 'bli_obj_length( a )'). - Added #ifdef BLIS_ENABLE_SMALL_MATRIX guard around prototypes for bli_gemm_small(), bli_syrk_small(), and bli_trsm_small(). This is strictly speaking unnecessary, but it serves as a useful visual cue to those who may be reading the files. - Removed cpp macro-protected small matrix debugging code from bli_trsm_front.c. - Added a GCC_OT_9_1_0 variable to build/config.mk.in to facilitate gcc version check for availability of -march=znver2, and added appropriate support to configure script. - Cleanups to compiler flags common to recent AMD microarchitectures in config/zen/amd_config.mk, including: removal of -march=znver1 et al. from CKVECFLAGS (since the -march flag is added within make_defs.mk); setting CRVECFLAGS similarly to CKVECFLAGS. - Cleanups to config/zen/bli_cntx_init_zen.c. - Cleanups, added comments to config/zen/make_defs.mk. - Cleanups to config/zen2/make_defs.mk, including making use of newly- added GCC_OT_9_1_0 and existing GCC_OT_6_1_0 to choose the correct set of compiler flags based on the version of gcc being used. - Reverted downstream changes to test/test_gemm.c. - Various whitespace/comment changes. commit a617301f9365ac720ff286514105d1b78951368b Author: Field G. Van Zee Date: Tue Oct 8 17:14:05 2019 -0500 Updates to docs/CodingConventions.md. commit 171f10069199f0cd280f18aac184546bd877c4fe Merge: 702486b1 05d58edf Author: Field G. Van Zee Date: Fri Oct 4 11:18:23 2019 -0500 Merge remote-tracking branch 'loveshack/emacs' commit 702486b12560b5c696ba06de9a73fc0d5107ca44 Author: Field G. Van Zee Date: Wed Oct 2 16:35:41 2019 -0500 Removed stray FAQ section introduced in 1907000. commit 1907000ad6ea396970c010f07ae42980b7b14fa0 Author: Field G. Van Zee Date: Wed Oct 2 16:31:54 2019 -0500 Updated to FAQ (AMD-related questions). Details: - Added a couple potential frequently-asked questions/answers releated to AMD's fork of BLIS. - Updated existing answers to other questions. commit 834f30a0dad808931c9d80bd5831b636ed0e1098 Author: Field G. Van Zee Date: Wed Oct 2 12:45:56 2019 -0500 Mention mixeddt paper in docs/MixedDatatypes.md. commit 05d58edfe0ea9279971d74f17a5f7a69c4672ed5 Author: Dave Love Date: Wed Oct 2 10:33:44 2019 +0100 Note .dir-locals.el in docs commit 531110c339f199a4d165d707c988d89ab4f5bfe8 Author: Dave Love Date: Wed Oct 2 10:16:22 2019 +0100 Modify Emacs config Confine it to cc-mode and add comment-start/end. commit 4bab365cab98202259c70feba6ec87408cba28d8 Author: Dave Love Date: Tue Oct 1 19:22:47 2019 +0000 Add .dir-locals.el for Emacs (#348) A minimal version that could probably do with extending, but at least gets the indentation roughly right. commit 4ec8dad66b3d37b0a2b47d19b7144bb62d332622 Author: Dave Love Date: Thu Sep 26 16:27:53 2019 +0100 Add .dir-locals.el for Emacs A minimal version that could probably do with extending, but at least gets the indentation roughly right. commit bc16ec7d1e2a30ce4a751255b70c9cbe87409e4f Author: Field G. Van Zee Date: Mon Sep 23 15:37:33 2019 -0500 Set execute bits of shared library at install-time. Details: - Modified the 0644 octal code used during installation of shared libraries to 0755 (for Linux/OSX only). Thanks to Adam J. Stewart for reporting this issue via #343. - CREDITS file update. commit c60db26aee9e7b4e5d0b031b0881e58d23666b53 Author: Field G. Van Zee Date: Tue Sep 17 18:04:17 2019 -0500 Fixed bad loop counter in bli_[cz]scal2bbs_mxn(). Details: - Fixed a typo in the loop counter for the 'd' (duplication) dimension in the complex macros of frame/include/level0/bb/bli_scal2bbs_mxn.h. They shouldn't be used by anyone yet, but thankfully clang via AppVeyor spit out warnings that alerted me to the issue. commit c766c81d628f0451d8255bf5e4b8be0a4ef91978 Author: Field G. Van Zee Date: Tue Sep 17 18:00:29 2019 -0500 Added missing schema arg to knl packm kernels. Details: - Added the pack_t schema argument to the knl packm kernel functions. This change was intended for inclusion in 31c8657. (Thank you SDE + Travis CI.) commit 31c8657f1d6d8f6efd8a73fd1995e995fc56748b Author: Field G. Van Zee Date: Tue Sep 17 17:42:10 2019 -0500 Added support for pre-broadcast when packing B. Details: - Added support for being able to duplicate (broadcast) elements in memory when packing matrix B (ie: the left-hand operand) in level-3 operations. This turns out advantageous for some architectures that can afford the cost of the extra bandwidth and somehow benefit from the pre-broadcast elements (and thus being able to avoid using broadcast-style load instructions on micro-rows of B in the gemm microkernel). - Support optionally disabling right-side hemm and symm. If this occurs, hemm_r is implemented in terms of hemm_l (and symm_r in terms of symm_l). This is needed when broadcasting during packing because the alternative--supporting the broadcast of B while also allowing matrix B to be Hermitian/symmetric--would be an absolute mess. - Support alignment factors for packed blocks of A, B, and C separately (as well as for general-purpose buffers). In addition, we support byte offsets from those alignment values (which is different from aligning by align+offset bytes to begin with). The default alignment values are BLIS_PAGE_SIZE in all four cases, with the offset values defaulting to zero. - Pass pack_t schema into bli_?packm_cxk() so that it can be then passed into the packm kernel, where it will be needed by packm kernels that perform broadcasts of B, since the idea is that we *only* want to broadcast when packing micropanels of B and not A. - Added definition for variadic bli_cntx_set_l3_vir_ukrs(), which can be used to set custom virtual level-3 microkernels in the cntx_t, which would typically be done in the bli_cntx_init_*() function defined in the subconfiguration of interest. - Added a "broadcast B" kernel function for use with NP/NR = 12/6, defined in in ref_kernels/1m/bli_packm_cxk_bb_ref.c. - Added a gemm, gemmtrsm, and trsm "broadcast B" reference kernels defined in ref_kernels/3/bb. (These kernels have been tested with double real with NP/NR = 12/6.) - Added #ifndef ... #endif guards around several macro constants defined in frame/include/bli_kernel_macro_defs.h. - Defined a few "broadcast B" static functions in frame/include/level0/bb for use by "broadcast B"-style packm reference kernels. For now, only the real domain kernels are tested and fully defined. - Output the alignment and offset values for packed blocks of A and B in the testsuite's "BLIS configuration info" section. - Comment updates to various files. - Bumped so_version to 3.0.0. commit fd9bf497cd4ff73ccdfc030ba037b3cb2f1c2fad Author: Field G. Van Zee Date: Tue Sep 17 15:45:24 2019 -0500 CREDITS file update. commit 6c8f2d1486ce31ad3c2083e5c2035acfd4409a43 Author: ShmuelLevine Date: Tue Sep 17 16:43:46 2019 -0400 Fix description for function bli_*pxby2v (#340) Fix typo in BLISTypedAPI.md for bli_?axpy2v() description. commit b5679c1520f8ae7637b3cc2313133461f62398dc Author: Field G. Van Zee Date: Tue Sep 17 14:00:37 2019 -0500 Inserted Multithreading links into BuildSystem.md. Details: - Inserted brief disclaimers about default disabled multithreading and default single-threadedness to BuildSystem.md along with links to the Multithreading.md document. Thanks to Jeff Diamond for suggesting these additions. - Trivial reword of sentence regarding automatically-detected architectures. commit f4f5170f8482c94132832eb3033bc8796da5420b Author: Isuru Fernando Date: Wed Sep 11 07:34:48 2019 -0500 Update README.md (#338) commit 1cfe8e2562e5e50769468382626ce36b734741c1 Author: Field G. Van Zee Date: Thu Sep 5 16:08:30 2019 -0500 Reimplemented bli_cpuid_query() for ARM. Details: - Rewrote bli_cpuid_query() for ARM architectures to use stdio-based functions such as fopen() and fgets() instead of popen(). The new code does more or less the same thing as before--searches /proc/cpuinfo for various strings, which are then parsed in order to determine the model, part number, and features. Thanks to Dave Love for suggesting this change in issue #335. commit 7c7819145740e96929466a248d6375d40e397e19 Author: Devin Matthews Date: Fri Aug 30 16:52:09 2019 -0500 Always use sqsumv to compute normfv. (#334) * Always use sqsumv to compute normfv on MacOS. * Unconditionally disable the "dot trick" in normfv. * Added explanatory comment to normfv definition. Details: - Added a comment above the unconditional disabling of the dotv-based implementation to normfv. Thanks to Roman Yurchak, Devin Matthews, and Isuru Fernando in helping with this improvement. - CREDITS file update. commit 80e6c10b72d50863b4b64d79f784df7befedfcd1 Author: Field G. Van Zee Date: Thu Aug 29 12:12:08 2019 -0500 Added reproduction section to Performance docs. Details: - Added section titled "Reproduction" to both Performance.md and PerformanceSmall.md that briefly nudges the motivated reader in the right direction if he/she wishes to run the same performance benchmarks used to produce the graphs shown in those documents. Thanks to Dave Love for making this suggestion. commit 14cb426414856024b9ae0f84ac21efcc1d329467 Author: Field G. Van Zee Date: Wed Aug 28 17:04:33 2019 -0500 Updated OpenBLAS, Eigen sup results. Details: - Updated the results shown in docs/PerformanceSmall.md for OpenBLAS and Eigen. commit b02e0aae8ce2705e91023b98ed416cd05430a78e Author: Field G. Van Zee Date: Tue Aug 27 14:37:46 2019 -0500 Updated test drivers to iterate backwards. Details: - Updated test driver source in test, test/3, test/1m4m, and test/mixeddt to iterate through the problem space backwards. This can help avoid certain situations where the CPU frequency does not immediately throttle up to its maximum. Thanks to Robert van de Geijn for recommending this fix (originally made to test/sup drivers in 57e422a). - Applied off-by-one matlab output bugfix from b6017e5 to test drivers in test, test/3, test/1m4m, and test/mixeddt directories. commit b6017e53f4b26c99b14cdaa408351f11322b1e80 Author: Field G. Van Zee Date: Tue Aug 27 14:18:14 2019 -0500 Bugfix of output text + tweaks to test/sup driver. Details: - Fixed an off-by-one bug in the output of matlab row indices in test/sup/test_gemm.c that only manifested when the problem size increment was equal to 1. - Disabled the building of rrc, rcr, rcc, crr, crc, and ccr storage combinations for blissup drivers in test/sup. This helps make the building of drivers complete sooner. - Trivial changes to test/sup/runme.sh. commit 138d403b6bb15e687a3fe26d3d967b8ccd1ed97b Author: Devin Matthews Date: Mon Aug 26 18:11:27 2019 -0500 Use -funsafe-math-optimizations and -ffp-contract=fast for all reference kernels when using gcc or clang. (#331) commit d5a05a15a7fcc38fb2519031dcc62de8ea4a530c Author: Field G. Van Zee Date: Mon Aug 26 16:54:31 2019 -0500 Cropped whitespace from new sup graphs. Details: - Previously forgot crop whitespace from the new .png graphs added/updated in docs/graphs/sup. commit a6c80171a353db709e43f9e6e7a3da87ce4d17ed Author: Field G. Van Zee Date: Mon Aug 26 16:51:31 2019 -0500 Fixed contents links in docs/PerformanceSmall.md. Details: - Corrected links in contents section of docs/PerformanceSmall.md, which were erroneously directing readers to the corresponding sections of docs/Performance.md. commit 40781774df56a912144ef19cc191ed626a89f0de Author: Field G. Van Zee Date: Mon Aug 26 16:47:37 2019 -0500 Updated sup performance graphs with libxsmm. Details: - Added libxsmm to column-stored sup graphs presented in docs/PerformanceSmall.md. - Updated sup results for BLASFEO. - Added sup results for Lonestar5 (Haswell). - Addresses issue #326. commit bfddf671328e7e372ac7228f72ff2d9d8e03ae18 Author: figual Date: Mon Aug 26 12:01:33 2019 +0200 Fixed context registration for Cortex A53 (#329). commit 4a0a6e89c568246d14de4cc30e3ff35aac23d774 Author: Field G. Van Zee Date: Sat Aug 24 15:25:16 2019 -0500 Changed test/sup alpha to 1; test libxsmm+netlib. Details: - Changed the value of alpha to 1.0 in test/sup/test_gemm.c. This is needed because libxsmm currently only optimizes gemm operations where alpha is unit (and beta is unit or zero). - Adjusted the test/sup/Makefile to test libxsmm with netlib BLAS as its fallback library. This is the library that will be called the problem dimensions are deemed too large, or any other criteria for optimization are not met. (This was done not because it is realistic, but rather so that it would be very clear when libxsmm ceased handling gemm calls internally when the data are graphed.) commit 7aa52b57832176c5c13a48e30a282e09ecdabf73 Author: Field G. Van Zee Date: Fri Aug 23 16:12:50 2019 -0500 Use libxsmm API in test/sup; add missing -ldl. Details: - Switch the driver source in test/sup so that libxsmm_?gemm() is called instead of ?gemm_() when compiling for / linking against libxsmm. libxsmm's documentation isn't clear on whether it is even *trying* to provide BLAS API compatibility, and I got tired of trying to figure it out. - Added missing -ldl in LDFLAGS when linking against libxsmm. commit 57e422aa168bee7416965265c93fcd4934cd7041 Author: Field G. Van Zee Date: Fri Aug 23 14:17:52 2019 -0500 Added libxsmm support to test/sup drivers. Details: - Modified test/sup/Makefile to build drivers that test the performance of skinny/small problems via libxsmm. - Modified test/sup/runme.sh to run aforementioned drivers. - Modified test/sup/test_gemm.c so that problem sizes are tested in reverse order (from largest to smallest). This can help avoid certain situations where the CPU frequency does not immediately throttle up to its maximum. Thanks to Robert van de Geijn for recommending this fix. commit 661681fe33978acce370255815c76348f83632bc Merge: 2f387e32 ef0a1a0f Author: Field G. Van Zee Date: Thu Aug 22 14:29:50 2019 -0500 Merge branch 'master' of github.com:flame/blis commit 2f387e32ef5f9a17bafb5076dc9f66c38b52b32d Author: Field G. Van Zee Date: Thu Aug 22 14:27:30 2019 -0500 Added Eigen -march=native hack to perf docs. Details: - Spell out the hack given to me by Sameer Agarwal in order to get Eigen to build with -march=native (which is critically important for Eigen) in docs/Performance.md and docs/PerformanceSmall.md. commit ef0a1a0faf683fe205f85308a54a77ffd68a9a6c Author: Devin Matthews Date: Wed Aug 21 17:40:24 2019 -0500 Update do_sde.sh (#330) * Update do_sde.sh Automatically accept SDE license and download directly from Intel * Update .travis.yml [ci skip] * Update .travis.yml Enable SDE testing for PRs. commit 0cd383d53a8c4a6871892a0395591ef5630d4ac0 Author: Field G. Van Zee Date: Wed Aug 21 13:39:05 2019 -0500 Corrected variable type and comment update. Details: - Forgot to save all changes from bli_gemmtrsm4m1_ref.c before commit in 8122f59. Fixed type mismatch and referenced github issue in comment. commit 8122f59745db780987da6aa1e851e9e76aa985e0 Author: Field G. Van Zee Date: Wed Aug 21 13:22:12 2019 -0500 Pacify 'restrict' warning in gemmtrsm4m1 ref ukr. Details: - Previously, some versions of gcc would complain that the same pointer, one_r, is being passed in for both alpha and beta in the fourth call to the real gemm ukernel in bli_gemmtrsm4m1_ref.c. This is understandable since the compiler knows that the real gemm ukernel qualifies all of its floating-point arguments (including alpha and beta) with restrict. A small hack has been inserted into the file that defines a new variable to store the value 1.0, which is now used in lieu of one_r for beta in the fourth call to the real gemm ukernel, which should pacify the compiler now. Thanks to Dave Love for reporting this issue (#328) and for Devin Matthews for offering his 'restrict' expertise. commit e8c6281f139bdfc9bd68c3b36e5e89059b0ead2e Author: Field G. Van Zee Date: Wed Aug 21 12:38:53 2019 -0500 Add -march support for specific gcc version ranges. Details: - Added logic to configure that checks the version of the compiler against known version ranges that could cause problems later in the build process. For example, versions of gcc older than 4.9.0 use different -march labels than version 4.9.0 or later ('-march=corei7-avx' vs '-march=sandybridge', respectively). Similarly, before 6.1, compilation on Zen was possible, but you need to start with -march=bdver4 and then disable instruction sets that were discarded during the transition from Excavator to Zen. So now, configure substitutes 'yes'/'no' values into anchors in config.mk.in, which sets various make variables (e.g. GCC_OT_4_9_0), which can be accessed and branched upon by the various configurations' make_defs.mk files when setting their compiler flags. - Updated config/haswell/make_defs.mk to branch on GCC_OT_4_9_0. - Updated config/sandybridge/make_defs.mk to branch on GCC_OT_4_9_0. - Updated config/zen/make_defs.mk to branch on GCC_OT_6_1_0. commit e6ac4ebcb6e6a372820e7f509c0af3342966b84a Author: Field G. Van Zee Date: Tue Aug 20 13:49:47 2019 -0500 Added page size, source location to perf docs. Details: - Added the page size, as returned via 'getconf -a | grep PAGE_SIZE', and the location of the performance drivers to docs/Performance.md (test/3) and docs/PerformanceSmall.md (test/sup). Thanks to Dave Love for suggesting these additions in #325. commit fdce1a5648d69034fab39943100289323011c36f Author: Meghana Date: Wed Jul 24 15:04:41 2019 +0530 changed gcc version check condition from 'ifeq' to 'if greater or equal' Change-Id: Ie4c461867829bcc113210791bbefb9517e52c226 commit c9486e0c4f82cd9f58f5ceb71c0df039e9970a20 Author: Meghana Date: Wed Jul 24 09:45:17 2019 +0530 code to detect version of gcc and set flags accordingly for zen2 Change-Id: I29b0311d0000dee1a2533ee29941acf53f9e9f34 commit 54afe3dfe6828a1aff65baabbf14c98d92e50692 Author: Field G. Van Zee Date: Tue Jul 23 16:54:28 2019 -0500 Added "Education and Learning" ToC entry to README. commit 9f53b1ce7ac702e84e71801fe96986f6aa16040e Author: Field G. Van Zee Date: Tue Jul 23 16:50:35 2019 -0500 Added "Education and Learning" section to README. Details: - Added a short section after the Intro of the README.md file titled "Education and Learning" that directs interested readers to the "LAFF-On Programming for High-Performance" massive open online course (MOOC) hosted via edX. commit deda4ca8a094ee18d7c7c45e040e8ef180f33a48 Author: Field G. Van Zee Date: Mon Jul 22 13:59:05 2019 -0500 Added test/1m4m driver directory. Details: - Added a new standalone test driver directory named '1m4m' that can build and run performance experiments for BLIS 1m, 4m1a, assembly, OpenBLAS, and the vendor library (MKL). This new driver directory was used to regenerate performance results for the 1m paper. - Added alternate (commented-out) cache blocksizes to config/haswell/bli_cntx_init_haswell.c. These blocksizes tend to work well on an a 12-core Intel Xeon E5-2650 v3. commit dcc0ce12fde4c6dca2b4764a1922a2ab19725867 Author: Meghana Date: Mon Jul 22 17:12:01 2019 +0530 Added a global Makefile for AMD architectures in config/zen folder This Makefile(amd_config.mk) has all the flags that are common to EPYC series Change-Id: Ic02c60a8293ccdd37f0f292e631acd198e6895de commit af17bca26a8bd3dcbee8ca81c18d7b25de09c483 Author: Field G. Van Zee Date: Fri Jul 19 14:46:23 2019 -0500 Updated haswell MC cache blocksizes. Details: - Updated the default MC cache blocksizes used by the haswell subconfig for both row-preferential (the default) and column-preferential microkernels. commit b5e9bce4dde5bf014dd9771ae741048e1f6c7748 Author: Field G. Van Zee Date: Fri Jul 19 14:42:37 2019 -0500 Updated -march flags for sandybridge, haswell. Details: - Updated the '-march=corei7-avx' flag in the sandybridge subconfig to '-march=sandybridge' and the '-march=core-avx2' flag in the haswell subconfig to '-march=haswell'. The older flags were used by older versions of gcc and should have been updated to the newer forms a long time ago. (The older flags were clearly working, even though they are no longer documented in the gcc man page.) commit c22b9dba5859a9fc94c8431eccc9e4eb9be02be1 Author: Field G. Van Zee Date: Tue Jul 16 13:14:47 2019 -0500 More updates to comments in testsuite modules. Details: - Updated most comments in testsuite modules that describe how the correctness test is performed so that it is clear whether the vector (normfv) or matrix (normfm) form of Frobenius norm is used. commit c4cc6fa702f444a05963db01db51bc7d6669e979 Author: Field G. Van Zee Date: Tue Jul 16 13:00:35 2019 -0500 New cntx_t blksz "set" functions + misc tweaks. Details: - Defined two new static functions in bli_cntx.h: bli_cntx_set_blksz_def_dt() bli_cntx_set_blksz_max_dt() which developers may find convenient when experimenting with different values of cache blocksizes. - Updated one- and two-socket multithreaded problem size range and increment values in test/3/Makefile. - Changed default to column storage in test/3/test_gemm.c. - Fixed typo in comment in testsuite/src/test_subm.c. commit b84cee29f42855dc1f263e42b83b1a46ac8def87 Merge: 1f80858a c7dd6e6c Author: Meghana Vankadari Date: Mon Jul 8 02:03:07 2019 -0400 Merge "Added compiler flags for vanilla clang" into amd-staging-rome2.0 commit 1f80858abf5ca220b2998fbe6f9b06c32d3864c3 Author: kdevraje Date: Fri Jul 5 16:05:11 2019 +0530 This checkin solves the dgemm performance issue jira ticket CPUPL 458, as #else was missed during integration, it was always following else path to get the block sizes Change-Id: I0084b5856c2513ab1066c08c15b5086db6532717 commit c7dd6e6cd2f910cbefcdc1e04a5adeb919a23de0 Author: Meghana Date: Thu Jul 4 09:32:51 2019 +0530 Added compiler flags for vanilla clang Change-Id: I13c00b4c0d65bbda4c929848fd48b0ab611952ab commit 2acd49b76457635625a01e31c2abc8902b23cf51 Author: Meghana Date: Mon Jul 1 15:42:38 2019 +0530 fix for test failures using AOCC 2.0 Change-Id: If44eaccc64bbe96bbbe1d32279b1b5773aba08d1 commit ceee2f973ebe115beca55ca77f9e3ce36b14c28a Author: Field G. Van Zee Date: Mon Jun 24 17:47:40 2019 -0500 Fixed thrinfo_t printing bug for small problems. Details: - Fixed a bug in bli_l3_thrinfo_print_gemm_paths() and bli_l3_thrinfo_print_trsm_paths(), defined in bli_l3_thrinfo.c, whereby subnodes of the thrinfo_t tree are "dereferenced" near the beginning of the functions, which may lead to segfaults in certain situations where the thread tree was not fully formed because the matrix problem was too small for the level of parallelism specified. (That is, too small because some problems were assigned no work due to the smallest units in the m and n dimensions being defined by the register blocksizes mr and nr.) The fix requires several nested levels of if statements, and this is one of those few instances where use of goto statements results in (mostly) prettier code, especially in the case of _gemm_paths(). And while it wasn't necessary, I ported this goto usage to the loop body that prints the thrinfo_t work_id and comm_id values for each thread. Thanks to Nicholai Tukanov for helping to find this bug. commit cac127182dd88ed0394ad81e6b91b897198e168a Merge: 565fa385 3a45ecb1 Author: kdevraje Date: Mon Jun 24 13:01:27 2019 +0530 Merge branch 'amd-staging-rome2.0' of ssh://git.amd.com:29418/cpulibraries/er/blis with public repo commit id 565fa3853b381051ac92cff764625909d105644d. Change-Id: I68b9824b110cf14df248217a24a6191b3df79d42 commit c152109e9a3b1cd74760e8a3215a676d25c18d2e Author: Field G. Van Zee Date: Wed Jun 19 13:23:24 2019 -0500 Updated BLASFEO results in PerformanceSmall.md. Details: - Updated the BLASFEO performance graphs shown in PerformanceSmall.md using a new commit of BLASFEO (2c9f312); updated PerformanceSmall.md accordingly. - Updated test/sup/octave/plot_l3sup_perf.m so that the .m files containing the mpnpkp results do not need to be preprocessed in order to plot half the problem size range (ie: up to 400 instead of the 800 range of the other shape cases). - Trivial updates to runme.m. commit 4d19c98110691d33ecef09d7e1b97bd1ccf4c420 Author: Field G. Van Zee Date: Sat Jun 8 11:02:03 2019 -0500 Trivial change to MixedDatatypes.md link text. commit 24965beabe83e19acf62008366097a7f198d4841 Author: Field G. Van Zee Date: Sat Jun 8 11:00:22 2019 -0500 Fixed typo in README.md's MixedDatatypes.md link. commit 50dc5d95760f41c5117c46f754245edc642b2179 Author: Field G. Van Zee Date: Fri Jun 7 13:10:16 2019 -0500 Adjust -fopenmp-simd for icc's preferred syntax. Details: - Use -qopenmp-simd instead of -fopenmp-simd when compiling with Intel icc. Recall that this option is used for SIMD auto-vectorization in reference kernels only. Support for the -f option has been completely deprecated and removed in newer versions of icc in favor of -q. Thanks to Victor Eijkhout for reporting this issue and suggesting the fix. commit ad937db9507786874c801b41a4992aef42d924a1 Author: Field G. Van Zee Date: Fri Jun 7 11:34:08 2019 -0500 Added missing #include "bli_family_thunderx2.h". Details: - Added a cpp-conditional directive block to bli_arch_config.h that #includes "bli_family_thunderx2.h". The code has been missing since adf5c17f. However, this never manifested as an error because the file is virtually empty and not needed for thunderx2 (or most subconfigs). Thanks to Jeff Diamond for helping to spot this. commit ce671917b2bc24895289247feef46f6fdd5020e7 Author: Field G. Van Zee Date: Thu Jun 6 14:17:21 2019 -0500 Fixed formatting/typo in docs/PerformanceSmall.md. commit 86c33a4eb284e2cf3282a1809be377785cdb3703 Author: Field G. Van Zee Date: Wed Jun 5 11:43:55 2019 -0500 Tweaked language in README.md related to sup/AMD. commit cbaa22e1ca368d36a8510f2b4ecd6f1523d1e1f3 Author: Field G. Van Zee Date: Tue Jun 4 16:06:58 2019 -0500 Added BLASFEO results to docs/PerformanceSmall.md. Details: - Updated the graphs linked in PerformanceSmall.md with BLASFEO results, and added documenting language accordingly. - Updated scripts in test/sup/octave to plot BLASFEO data. - Minor tweak to language re: how OpenBLAS was configured for docs/Performance.md. commit 763fa39c3088c0e2c0155675a3ca868a58bffb30 Author: Field G. Van Zee Date: Tue Jun 4 14:46:45 2019 -0500 Minor tweaks to test/sup. Details: - Changed starting problem and increment from 16 to 4. - Added 'lll' (square problems) to list of problem size shapes to compile and run with. - Define BLASFEO location and added BLASFEO-related definitions. commit 5e1e696003c9151b1879b910a1957b7bdd7b0deb Author: Field G. Van Zee Date: Mon Jun 3 18:37:20 2019 -0500 CHANGELOG update (0.6.0) commit 18c876b989fd0dcaa27becd14e4f16bdac7e89b3 Author: Field G. Van Zee Date: Mon Jun 3 18:37:19 2019 -0500 Version file update (0.6.0) commit 0f1b3bf49eb593ca7bb08b68a7209f7cd550f912 Author: Field G. Van Zee Date: Mon Jun 3 18:35:19 2019 -0500 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. - CREDITS file update. commit 27da2e8400d900855da0d834b5417d7e83f21de1 Author: Field G. Van Zee Date: Mon Jun 3 17:14:56 2019 -0500 Minor edits to docs/PerformanceSmall.md. Details: - Added performance analysis to "Comments" section of both Kaby Lake and Epyc sections. - Added emphasis to certain passages. commit 09ba05c6f87efbaadf085497dc137845f16ee9c5 Author: Field G. Van Zee Date: Mon Jun 3 16:53:19 2019 -0500 Added sup performance graphs/document to 'docs'. Details: - Added a new markdown document, docs/PerformanceSmall.md, which publishes new performance graphs for Kaby Lake and Epyc showcasing the new BLIS sup (small/skinny/unpacked) framework logic and kernels. For now, only single-threaded dgemm performance is shown. - Reorganized graphs in docs/graphs into docs/graphs/large, with new graphs being placed in docs/graphs/sup. - Updates to scripts in test/sup/octave, mostly to allow decent output in both GNU octave and Matlab. - Updated README.md to mention and refer to the new PerformanceSmall.md document. commit 6bf449cc6941734748034de0e9af22b75f1d6ba1 Merge: abd8a9fa a4e8801d Author: Field G. Van Zee Date: Fri May 31 17:42:40 2019 -0500 Merge branch 'amd' commit a4e8801d08d81fa42ebea6a05a990de8dcedc803 Author: Field G. Van Zee Date: Fri May 31 17:30:51 2019 -0500 Increased MT sup threshold for double to 201. Details: - Fine-tuned the double-precision real MT threshold (which controls whether the sup implementation kicks for smaller m dimension values) from 180 to 201 for haswell and 180 to 256 for zen. - Updated octave scripts in test/sup/octave to include a seventh column to display performance for m = n = k. commit 3a45ecb15456249c30ccccd60e42152f355615c1 Merge: 3f867c96 b69fb0b7 Author: Kiran Devrajegowda Date: Fri May 31 06:47:02 2019 -0400 Merge "Added back BLIS_ENABLE_ZEN_BLOCK_SIZES macro to zen configuration, this is same as release 1.3. This was added before to improve DGEMM Multithreaded scalability on Naples for when number of threads is greater than 16. By mistake this got deleted in many changes done for 2.0 release, now we are adding this change back., in bli_gemm_front.c - code cleanup" into amd-staging-rome2.0 commit b69fb0b74a4756168de270fc9b18f7cf7aa57f17 Author: Kiran Varaganti Date: Fri May 31 15:14:22 2019 +0530 Added back BLIS_ENABLE_ZEN_BLOCK_SIZES macro to zen configuration, this is same as release 1.3. This was added before to improve DGEMM Multithreaded scalability on Naples for when number of threads is greater than 16. By mistake this got deleted in many changes done for 2.0 release, now we are adding this change back., in bli_gemm_front.c - code cleanup Change-Id: I9f5d8225254676a99c6f2b09a0825e545206d0fc commit 3f867c96caea3bbbbeeff1995d90f6cf8c9895fb Author: kdevraje Date: Fri May 31 12:22:44 2019 +0530 When running HPL with pure MPI without DGEMM Threading (Single Threaded BLIS ), making this macro 1 gives best performance.wq Change-Id: I24fd0bf99216f315e49f1c74c44c3feaffd7078d commit abd8a9fa7df4569aa2711964c19888b8e248901f Author: Field G. Van Zee Date: Tue May 28 12:49:44 2019 -0500 Inadvertantly hidden xerbla_() in blastest (#313). Details: - Attempted a fix to issue #313, which reports that when building only a shared library (ie: static library build is disabled), running the BLAS test drivers can fail because those drivers provide their own local version of xerbla_() as a clever (albeit still rather hackish) way of checking the error codes that result from the individual tests. This local xerbla_() function is never found at link-time because the BLAS test drivers' Makefile imports BLIS compilation flags via the get-user-cflags-for() function, which currently conveys the -fvisibility=hidden flag, which hides symbols unless they are explicitly annotated for export. The -fvisibility=hidden flag was only ever intended for use when building BLIS (not for applications), and so the attempted solution here is to omit the symbol export flag(s) from get-user-cflags-for() by storing the symbol export flag(s) to a new BULID_SYMFLAGS variable instead of appending it to the subconfigurations' CMISCFLAGS variable (which is returned by every get-*-cflags-for() function). Thanks to M. Zhou for reporting this issue and also to Isuru Fernando for suggesting the fix. - Renamed BUILD_FLAGS to BUILD_CPPFLAGS to harmonize with the newly created BUILD_SYMFLAGS. - Fixed typo in entry for --export-shared flag in 'configure --help' text. commit 13806ba3b01ca0dd341f4720fb930f97e46710b0 Author: kdevraje Date: Mon May 27 16:24:43 2019 +0530 This check in has changes w.r.t Copyright information, which is changed to (start year) - 2019 Change-Id: Ide3c8f7172210b8d3538d3c36e88634ab1ba9041 commit ee123f535872510f77100d3d55a43d4ca56047d5 Author: Meghana Date: Mon May 27 15:36:44 2019 +0530 Defined small matrix thresholds for TRSM for various cases for NAPLES and ROME Updated copyright information for kernels/zen/bli_trsm_small.c file Removed separate kernels for zen2 architecture Instead added threshold conditions in zen kernels both for ROME and NAPLES Change-Id: Ifd715731741d649b6ad16b123a86dbd6665d97e5 commit 9d93a4caa21402d3a90aac45d7a1603736c9fd63 Author: prangana Date: Fri May 24 17:59:13 2019 +0530 update version 2.0 commit 755730608d923538273a90c48bfdf77571f86519 Author: Field G. Van Zee Date: Thu May 23 17:34:36 2019 -0500 Minor rewording of language around mt env. vars. commit ba31abe73c97c16c78fffc59a215761b8d9fd1f6 Author: Field G. Van Zee Date: Thu May 23 14:59:53 2019 -0500 Added BLIS theading info to Performance.md. Details: - Documented the BLIS environment variables that were set (e.g. BLIS_JC_NT, BLIS_IC_NT, BLIS_JR_NT) for each machine and threading configuration in order to achieve the parallelism reported on in docs/Performance.md. commit cb788ffc89cac03b44803620412a5e83450ca949 Author: Field G. Van Zee Date: Thu May 23 13:00:53 2019 -0500 Increased MT sup threshold for double to 180. Details: - Increased the double-precision real MT threshold (which controls whether the sup implementation kicks for smaller m dimension values) from 80 to 180, and this change was made for both haswell and zen subconfigurations. This is less about the m dimension in particular and more about facilitating a smoother performance transition when m = n = k. commit 057f5f3d211e7513f457ee6ca6c9555d00ad1e57 Author: Field G. Van Zee Date: Thu May 23 12:51:17 2019 -0500 Minor build system housekeeping. Details: - Commented out redundant setting of LIBBLIS_LINK within all driver- level Makefiles. This variable is already set within common.mk, and so the only time it should be overridden is if the user wants to link to a different copy of libblis. - Very minor changes to build/gen-make-frags/gen-make-frag.sh. - Whitespace and inconsequential quoting change to configure. - Moved top-level 'windows' directory into a new 'attic' directory. commit e05171118c377f356f89c4daf8a0d5ddc5a4e4f7 Author: Meghana Date: Thu May 23 16:15:27 2019 +0530 Implemented TRSM for small matrices for cases where A is on the right Added separate kernels for zen and zen2 Change-Id: I6318ddc250cf82516c1aa4732718a35eae0c9134 commit 02920f5c480c42706b487e37b5ecc96c3555b851 Author: kdevraje Date: Thu May 23 15:29:59 2019 +0530 make checkblis fails for matrix dimension check at the begining hence reverting it Change-Id: Ibd2ee8c2d4914598b72003fbfc5845be9c9c1e87 commit 84215022f29fb3bfedd254d041635308d177e6c0 Author: kdevraje Date: Thu May 23 11:08:41 2019 +0530 Adding threshold condition to dgemm small matrix kernels, defining the constants in zen2 configuration Change-Id: I53a58b5d734925a6fcb8d8bea5a02ddb8971fcd5 commit a3554eb1dcc1b5b94d81c60761b2f01c3d827ffa Merge: ea082f83 17b878b6 Author: kdevraje Date: Thu May 23 11:51:07 2019 +0530 Merge branch 'amd-staging-rome2.0' of ssh://git.amd.com:29418/cpulibraries/er/blis to configure zen2 Change-Id: I97e17bca9716b80b862925f97bb513c07b4b0cae commit ea082f839071dd9ec555062dc3851c31d12f00e4 Author: kdevraje Date: Thu May 23 10:38:29 2019 +0530 adding empty zen2 directory with .gitignore file Change-Id: Ifa37cf54b2578aa19ad335372b44bca17043fe4b commit b80bd5bcb2be8551a9a21fafc8e6c8b6336c99b5 Author: Kiran Varaganti Date: Tue May 21 15:11:47 2019 +0530 config/zen/bli_cntx_init_zen.c: removed BLIS_ENBLE_ZEN_BLOCK_SIZES macro. We have different configurations for both zen and zen2 config/zen/bli_family_zen.h: deleted macro BLIS_ENBLE_ZEN_BLOCK_SIZES config/zen/make_defs.mk: removed compiler flag -mno-avx256-split-unaligned-store frame/base/bli_cpuid.c: ROME family is 17H but model # is from 0x30H. test/test_gemm.c - commented out #define FILE_IN_OUT (some compilation error when BLIS is configured as amd64) Now we can use single configuration has ./configure amd64 - this will work both for ROME & Naples Change-Id: I91b4fc35380f8a35b4f4c345da040c6b5910b4a2 commit a042db011df9a1c3e7c7ac546541f4746b176ea5 Author: Kiran Varaganti Date: Mon May 20 14:17:32 2019 +0530 Modified make_defs.mk for zen2 to get compiled by gcc version less than gcc9.0 Change-Id: I8fcac30538ee39534c296932639053b47b9a2d43 commit a23f92594cf3d530e5794307fe97afc877d853b7 Author: Kiran Varaganti Date: Mon May 20 10:48:06 2019 +0530 config_registry: New AMD zen2 architecture configuration added. frame/base/bli_arch.c: #ifdef BLIS_FAMILY_ZEN2 id = BLIS_ARCH_ZEN2; #endif added. zen2 is added in config_name[BLIS_NUM_ARCHS] frame/base/bli_cpuid.c : #ifdef BLIS_CONFIG_ZEN2 if ( bli_cpuid_is_zen2( family, model, features ) ) return BLIS_ARCH_ZEN2; #endif, defined new function bool bli_cpuid_is_zen2(...). frame/base/bli_cpuid.h : declared bli_cpuid_is_zen2(..). frame/base/bli_gks.c : #ifdef BLIS_CONFIG_ZEN2 bli_gks_register_cntx(BLIS_ARCH_ZEN2, bli_cntx_init_zen2, bli_cntx_init_zen2_ref, bli_cntx_init_zen2_ind); #endif frame/include/bli_arch_config.h : #ifdef BLIS_CONFIG_ZEN2 CNTX_INIT_PROTS(zen2) #endif #ifdef BLIS_FAMILY_ZEN2 #include "bli_family_zen2.h" #endif frame/include/bli_type_defs.h : added BLIS_ARCH_ZEN2 in arch_t enum. BLIS_NUM_ARCHS 20 Change-Id: I2a2d9b7266673e78a4f8543b1bfb5425b0aa7866 commit 17b878b66d917d50b6fe23721d8579e826cb3e8c Author: kdevraje Date: Wed May 22 14:02:53 2019 +0530 adding license same as in ut-austin-amd-branch Change-Id: I6790768d2bf5d42369d304ef93e34701f95fbaff commit df755848b8a271323e007c7a628c64af63deab00 Merge: ca4b33c0 c72ae27a Author: kdevraje Date: Wed May 22 13:30:07 2019 +0530 Merge branch 'amd-staging-rome2.0' of ssh://git.amd.com:29418/cpulibraries/er/blis into rome2.0 Change-Id: Ie8aad1ab810f0f3c0b90ec67f9dd3dfb8dcc74cc commit c72ae27adee4726679ee004d02c972582b5285b4 Author: Nisanth M P Date: Mon Mar 19 12:49:26 2018 +0530 Re-enabling the small matrix gemm optimization for target zen Change-Id: I13872784586984634d728cd99a00f71c3f904395 commit ab0818af80f7f683080873f3fa24734b65267df2 Author: sraut Date: Wed Oct 3 15:30:33 2018 +0530 Review comments incorporated for small TRSM. Change-Id: Ia64b7b2c0375cc501c2cb0be8a1af93111808cd9 commit 32392cfc72af7f42da817a129748349fb1951346 Author: Jeff Hammond Date: Tue May 14 15:52:30 2019 -0400 add info about CXX in configure (#311) commit fa7e6b182b8365465ade178b0e4cd344ff6f6460 Author: Field G. Van Zee Date: Wed May 1 19:13:00 2019 -0500 Define _POSIX_C_SOURCE in bli_system.h. Details: - Added #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif to bli_system.h so that an application that uses BLIS (specifically, an application that #includes blis.h) does not need to remember to #define the macro itself (either on the command line or in the code that includes blis.h) in order to activate things like the pthreads. Thanks to Christos Psarras for reporting this issue and suggesting this fix. - Commented out #include in bli_system.h, since I don't think this header is used/needed anymore. - Comment update to function macro for bli_?normiv_unb_var1() in frame/util/bli_util_unb_var1.c. commit 3df84f1b5d5e1146bb01bfc466ac20c60a9cc859 Author: Field G. Van Zee Date: Sat Apr 27 21:27:32 2019 -0500 Minor bugfixes in sup dgemm implementation. Details: - Fixed an obscure but in the bli_dgemmsup_rv_haswell_asm_5x8n() kernel that only affected the beta == 0, column-storage output case. Thanks to the BLAS test drivers for catching this bug. - Previously, bli_gemmsup_ref_var1n() and _var2m() were returning if k = 0, when the correct action would be to scale by beta (and then return). Thanks to the BLAS test drivers to catching this bug. - Changed the sup threshold behavior such that the sup implementation only kicks in if a matrix dimension is strictly less than (rather than less than or equal to) the threshold in question. - Initialize all thresholds to zero (instead of 10) by default in ref_kernels/bli_cntx_ref.c. This, combined with the above change to threshold testing means that calls to BLIS or BLAS with one or more matrix dimensions of zero will no longer trigger the sup implementation. - Added disabled debugging output to frame/3/bli_l3_sup.c (for future use, perhaps). commit ecbdd1c42dcebfecd729fe351e6bb0076aba7d81 Author: Field G. Van Zee Date: Sat Apr 27 19:38:11 2019 -0500 Ceased use of BLIS_ENABLE_SUP_MR/NR_EXT macros. Details: - Removed already limited use of the BLIS_ENABLE_SUP_MR_EXT and BLIS_ENABLE_SUP_NR_EXT macros in bli_gemmsup_ref_var1n() and bli_gemmsup_ref_var2m(). Their purpose was merely to avoid a long conditional that would determine whether to allow the last iteration to be merged with the second-to-last iteration. Functionally, the macros were not needed, and they ended up causing problems when building configuration families such as intel64 and x86_64. commit aa8a6bec3036a41e1bff2034f8ef6766a704ec49 Author: Field G. Van Zee Date: Sat Apr 27 18:53:33 2019 -0500 Fixed typo in --disable-sup-handling macro guard. Details: - Fixed an incorrectly-named macro guard that is intended to allow disabling of the sup framework via the configure option --disable-sup-handling. In this case, the preprocessor macro, BLIS_DISABLE_SUP_HANDLING, was still named by its name from an older uncommitted version of the code (BLIS_DISABLE_SM_HANDLING). commit b9c9f03502c78a63cfcc21654b06e9089e2a3822 Author: Field G. Van Zee Date: Sat Apr 27 18:44:50 2019 -0500 Implemented gemm on skinny/unpacked matrices. Details: - Implemented a new sub-framework within BLIS to support the management of code and kernels that specifically target matrix problems for which at least one dimension is deemed to be small, which can result in long and skinny matrix operands that are ill-suited for the conventional level-3 implementations in BLIS. The new framework tackles the problem in two ways. First the stripped-down algorithmic loops forgo the packing that is famously performed in the classic code path. That is, the computation is performed by a new family of kernels tailored specifically for operating on the source matrices as-is (unpacked). Second, these new kernels will typically (and in the case of haswell and zen, do in fact) include separate assembly sub-kernels for handling of edge cases, which helps smooth performance when performing problems whose m and n dimension are not naturally multiples of the register blocksizes. In a reference to the sub-framework's purpose of supporting skinny/unpacked level-3 operations, the "sup" operation suffix (e.g. gemmsup) is typically used to denote a separate namespace for related code and kernels. NOTE: Since the sup framework does not perform any packing, it targets row- and column-stored matrices A, B, and C. For now, if any matrix has non-unit strides in both dimensions, the problem is computed by the conventional implementation. - Implemented the default sup handler as a front-end to two variants. bli_gemmsup_ref_var2() provides a block-panel variant (in which the 2nd loop around the microkernel iterates over n and the 1st loop iterates over m), while bli_gemmsup_ref_var1() provides a panel-block variant (2nd loop over m and 1st loop over n). However, these variants are not used by default and provided for reference only. Instead, the default sup handler calls _var2m() and _var1n(), which are similar to _var2() and _var1(), respectively, except that they defer to the sup kernel itself to iterate over the m and n dimension, respectively. In other words, these variants rely not on microkernels, but on so-called "millikernels" that iterate along m and k, or n and k. The benefit of using millikernels is a reduction of function call and related (local integer typecast) overhead as well as the ability for the kernel to know which micropanel (A or B) will change during the next iteration of the 1st loop, which allows it to focus its prefetching on that micropanel. (In _var2m()'s millikernel, the upanel of A changes while the same upanel of B is reused. In _var1n()'s, the upanel of B changes while the upanel of A is reused.) - Added a new configure option, --[en|dis]able-sup-handling, which is enabled by default. However, the default thresholds at which the default sup handler is activated are set to zero for each of the m, n, and k dimensions, which effectively disables the implementation. (The default sup handler only accepts the problem if at least one dimension is smaller than or equal to its corresponding threshold. If all dimensions are larger than their thresholds, the problem is rejected by the sup front-end and control is passed back to the conventional implementation, which proceeds normally.) - Added support to the cntx_t structure to track new fields related to the sup framework, most notably: - sup thresholds: the thresholds at which the sup handler is called. - sup handlers: the address of the function to call to implement the level-3 skinny/unpacked matrix implementation. - sup blocksizes: the register and cache blocksizes used by the sup implementation (which may be the same or different from those used by the conventional packm-based approach). - sup kernels: the kernels that the handler will use in implementing the sup functionality. - sup kernel prefs: the IO preference of the sup kernels, which may differ from the preferences of the conventional gemm microkernels' IO preferences. - Added a bool_t to the rntm_t structure that indicates whether sup handling should be enabled/disabled. This allows per-call control of whether the sup implementation is used, which is useful for test drivers that wish to switch between the conventional and sup codes without having to link to different copies of BLIS. The corresponding accessor functions for this new bool_t are defined in bli_rntm.h. - Implemented several row-preferential gemmsup kernels in a new directory, kernels/haswell/3/sup. These kernels include two general implementation types--'rd' and 'rv'--for the 6x8 base shape, with two specialized millikernels that embed the 1st loop within the kernel itself. - Added ref_kernels/3/bli_gemmsup_ref.c, which provides reference gemmsup microkernels. NOTE: These microkernels, unlike the current crop of conventional (pack-based) microkernels, do not use constant loop bounds. Additionally, their inner loop iterates over the k dimension. - Defined new typedef enums: - stor3_t: captures the effective storage combination of the level-3 problem. Valid values are BLIS_RRR, BLIS_RRC, BLIS_RCR, etc. A special value of BLIS_XXX is used to denote an arbitrary combination which, in practice, means that at least one of the operands is stored according to general stride. - threshid_t: captures each of the three dimension thresholds. - Changed bli_adjust_strides() in bli_obj.c so that bli_obj_create() can be passed "-1, -1" as a lazy request for row storage. (Note that "0, 0" is still accepted as a lazy request for column storage.) - Added support for various instructions to bli_x86_asm_macros.h, including imul, vhaddps/pd, and other instructions related to integer vectors. - Disabled the older small matrix handling code inserted by AMD in bli_gemm_front.c, since the sup framework introduced in this commit is intended to provide a more generalized solution. - Added test/sup directory, which contains standalone performance test drivers, a Makefile, a runme.sh script, and an 'octave' directory containing scripts compatible with GNU Octave. (They also may work with matlab, but if not, they are probably close to working.) - Reinterpret the storage combination string (sc_str) in the various level-3 testsuite modules (e.g. src/test_gemm.c) so that the order of each matrix storage char is "cab" rather than "abc". - Comment updates in level-3 BLAS API wrappers in frame/compat. commit 0d549ceda822833bec192bbf80633599620c15d9 Author: Isuru Fernando Date: Sat Apr 27 22:56:02 2019 +0000 make unix friendly archives on appveyor (#310) commit ca4b33c001f9e959c43b95a9a23f9df5adec7adf Author: Kiran Varaganti Date: Wed Apr 24 15:02:39 2019 +0530 Added compiler option (-mno-avx256-split-unaligned-store) in the file config/zen/make_defs.mk to improve performance of intrinsic codes, this flag ensures compiler generates 256-bit stores for the equivalent intrinsics code. Change-Id: I8f8cd81a3604869df18d38bc42097a04f178d324 commit 945928c650051c04d6900c7f4e9e29cd0e5b299f Merge: 663f6629 74e513eb Author: Field G. Van Zee Date: Wed Apr 17 15:58:56 2019 -0500 Merge branch 'amd' of github.com:flame/blis into amd commit 74e513eb6a6787a925d43cd1500277d54d86ab8f Author: Field G. Van Zee Date: Wed Apr 17 13:34:44 2019 -0500 Support row storage in Eigen gemm test/3 driver. Details: - Added preprocessor branches to test/3/test_gemm.c to explicitly support row-stored matrices. Column-stored matrices are also still supported (and is the default for now). (This is mainly residual work leftover from initial integration of Eigen into the test drivers, so if we ever want to test Eigen with row-stored matrices, the code will be ready to use, even if it is not yet integrated into the Makefile in test/3.) commit b5d457fae9bd75c4ca67f7bc7214e527aa248127 Author: Field G. Van Zee Date: Tue Apr 16 12:50:01 2019 -0500 Applied forgotten variable rename from 89a70cc. Details: - Somehow the variable name change (root_file_name -> root_inputname) in flatten-headers.py mentioned in the commit log entry for 89a70cc didn't make it into the actual commit. This commit applies that change. commit 89a70cccf869333147eb2559cdfa5a23dc915824 Author: Field G. Van Zee Date: Thu Apr 11 18:33:08 2019 -0500 GNU-like handling of installation prefix et al. Details: - Changed the default installation prefix from $HOME/lib to /usr/local. - Modified the way configure internally handles the prefix, libdir, includedir, and sharedir (and also added an --exec-prefix option). The defaults to these variables are set as follows: prefix: /usr/local exec_prefix: ${prefix} libdir: ${exec_prefix}/lib includedir: ${prefix}/include sharedir: ${prefix}/share The key change, aside from the addition of exec_prefix and its use to define the default to libdir, is that the variables are substituted into config.mk with quoting that delays evaluation, meaning the substituted values may contain unevaluated references to other variables (namely, ${prefix} and ${exec_prefix}). This more closely follows GNU conventions, including those used by GNU autoconf, and also allows make to override any one of the variables *after* configure has already been run (e.g. during 'make install'). - Updates to build/config.mk.in pursuant to above changes. - Updates to output of 'configure --help' pursuant to above changes. - Updated docs/BuildSystem.md to reflect the new default installation prefix, as well as mention EXECPREFIX and SHAREDIR. - Changed the definitions of the UNINSTALL_OLD_* variables in the top-level Makefile to use $(wildcard ...) instead of 'find'. This was motivated by the new way of handling prefix and friends, which leads to the 'find' command being run on /usr/local (by default), which can take a while almost never yielding any benefit (since the user will very rarely use the uninstall-old targets). - Removed periods from the end of descriptive output statements (i.e., non-verbose output) since those statements often end with file or directory paths, which get confusing to read when puctuated by a period. - Trival change to 'make showconfig' output. - Removed my name from 'configure --help'. (Many have contributed to it over the years.) - In configure script, changed the default state of threading_model variable from 'no' to 'off' to match that of debug_type, where there are similarly more than two valid states. ('no' is still accepted if given via the --enable-debug= option, though it will be standardized to 'off' prior to config.mk being written out.) - Minor variable name change in flatten-headers.py that was intended for 32812ff. - CREDITS file update. commit 9d76688ad90014a11ddc0c2f27253d62806216b1 Author: kdevraje Date: Thu Apr 11 10:22:48 2019 +0530 Fix for single rank crash with HPL application. When computing offset of C buffer, as integer variables are used for a row and column index, the intermediate result value overflows and a negative value gets added to the buffer, when the negative value is too large it would index the buffer out of the range resulting in segmentation fault. Although the crash is a result of dgemm kernel, added similar code in sgemm kernel also. Change-Id: I171119b0ec0dfbd8e63f1fcd6609a94384aabd27 commit 32812ff5aba05d34c421fe1024a61f3e2d5e7052 Author: Field G. Van Zee Date: Tue Apr 9 12:20:19 2019 -0500 Minor bugfix to flatten-headers.py. Details: - Fixed a minor bug in flatten-headers.py whereby the script, upon encountering a #include directive for the root header file, would erroneously recurse and inline the conents of that root header. The script has been modified to avoid recursion into any headers that share the same name as the root-level header that was passed into the script. (Note: this bug didn't actually manifest in BLIS, so it's merely a precaution for usage of flatten-headers.py in other contexts.) commit bec90e0b6aeb3c9b19589c2b700fda2d66f6ccdf Author: Field G. Van Zee Date: Tue Apr 2 17:45:13 2019 -0500 Minor update to docs/HardwareSupport.md document. Details: - Added more details and clarifying language to implications of 1m and the recycling of microkernels between microarchitectures. commit 89cd650e7be01b59aefaa85885a3ea78970351e4 Author: Field G. Van Zee Date: Tue Apr 2 17:23:55 2019 -0500 Use void_fp for function pointers instead of void*. Change void*-typed function pointers to void_fp. - Updated all instances of void* variables that store function pointers to variables of a new type, void_fp. Originally, I wanted to define the type of void_fp as "void (*void_fp)( void )"--that is, a pointer to a function with no return value and no arguments. However, once I did this, I realized that gcc complains with incompatible pointer type (-Wincompatible-pointer-types) warnings every time any such a pointer is being assigned to its final, type-accurate function pointer type. That is, gcc will silently typecast a void* to another defined function pointer type (e.g. dscalv_ker_ft) during an assignment from the former to the latter, but the same statement will trigger a warning when typecasting from a void_fp type. I suspect an explicit typecast is needed in order to avoid the warning, which I'm not willing to insert at this time. - Added a typedef to bli_type_defs.h defining void_fp as void*, along with a commented-out version of the aborted definition described above. (Note that POSIX requires that void* and function pointers be interchangeable; it is the C standard that does not provide this guarantee.) - Comment updates to various _oapi.c files. commit ffce3d632b284eb52474036096815ec38ca8dd5f Author: Field G. Van Zee Date: Tue Apr 2 14:40:50 2019 -0500 Renamed armv8a gemm kernel filename. Details: - Renamed kernels/armv8a/3/bli_gemm_armv8a_opt_4x4.c to kernels/armv8a/3/bli_gemm_armv8a_asm_d6x8.c. This follows the naming convention used by other kernel sets, most notably haswell. commit 77867478af02144544b4e7b6df5d54d874f3f93b Author: Isuru Fernando Date: Tue Apr 2 13:33:11 2019 -0500 Use pthreads on MinGW and Cygwin (#307) commit 7bc75882f02ce3470a357950878492e87e688cec Author: Field G. Van Zee Date: Thu Mar 28 17:40:50 2019 -0500 Updated Eigen results in docs/graphs with 3.3.90. Details: - Updated the level-3 performance graphs in docs/graphs with new Eigen results, this time using a development version cloned from their git mirror on March 27, 2019 (version 3.3.90). Performance is improved over 3.3.7, though still noticeably short of BLIS/MKL in most cases. - Very minor updates to docs/Performance.md and matlab scripts in test/3/matlab. commit 20ea7a1217d3833db89a96158c42da2d6e968ed8 Author: Field G. Van Zee Date: Wed Mar 27 18:09:17 2019 -0500 Minor text updates (Eigen) to docs/Performance.md. Details: - Added/updated a few more details, mostly regarding Eigen. commit bfb7e1bc6af468e4ff22f7e27151ea400dcd318a Merge: 044df950 2c85e1dd Author: Field G. Van Zee Date: Wed Mar 27 17:58:19 2019 -0500 Merge branch 'dev' commit 2c85e1dd9d5d84da7228ea4ae6deec56a89b3a8f Author: Field G. Van Zee Date: Wed Mar 27 16:29:51 2019 -0500 Added Eigen results to performance graphs. Details: - Updated the Haswell, SkylakeX, and Epyc performance graphs in docs/graphs to report on Eigen implementations, where applicable. Specifically, Eigen implements all level-3 operations sequentially, however, of those operations it only provides multithreaded gemm. Thus, mt results for symm/hemm, syrk/herk, trmm, and trsm are omitted. Thanks to Sameer Agarwal for his help configuring and using Eigen. - Updated docs/Performance.md to note the new implementation tested. - CREDITS file update. commit bfac7e385f8061f2e6591de208b0acf852f04580 Author: Field G. Van Zee Date: Wed Mar 27 16:04:48 2019 -0500 Added ability to plot with Eigen in test/3/matlab. Details: - Updated matlab scripts in test/3/matlab to optionally plot/display Eigen performance curves. Whether Eigen is plotted is determined by a new boolean function parameter, with_eigen. - Updated runme.m scratchpad to reflect the latest invocations of the plot_panel_4x5() function (with Eigen plotting enabled). commit 67535317b9411c90de7fa4cb5b0fdb8f61fdcd79 Author: Field G. Van Zee Date: Wed Mar 27 13:32:18 2019 -0500 Fixed mislabeled eigen output from test/3 drivers. Details: - Fixed the Makefile in test/3 so that it no longer incorrectly labels the matlab output variables from Eigen-linked hemm, herk, trmm, and trsm driver output as "vendor". (The gemm drivers were already correctly outputing matlab variables containing the "eigen" label.) commit 044df9506f823643c0cdd53e81ad3c27a9f9d4ff Author: Isuru Fernando Date: Wed Mar 27 12:39:31 2019 -0500 Test with shared on windows (#306) Export macros can't support both shared and static at the same time. When blis is built with both shared and static, headers assume that shared is used at link time and dllimports the symbols with __imp_ prefix. To use the headers with static libraries a user can give -DBLIS_EXPORT= to import the symbol without the __imp_ prefix commit 5e6b160c8a85e5e23bab0f64958a8acf4918a4ed Author: Field G. Van Zee Date: Tue Mar 26 19:10:59 2019 -0500 Link to Eigen BLAS for non-gemm drivers in test/3. Details: - Adjusted test/3/Makefile so that the test drivers are linked against Eigen's BLAS library for hemm, herk, trmm, and trsm. We have to do this since Eigen's headers don't define implementations to the standard BLAS APIs. - Simplified #included headers in hemm, herk, trmm, and trsm source driver files, since nothing specific to Eigen is needed at compile-time for those operations. commit e593221383aae19dfdc3f30539de80ed05cfec7f Merge: 92fb9c87 c208b9dc Author: Field G. Van Zee Date: Tue Mar 26 15:51:45 2019 -0500 Merge branch 'master' into dev commit 92fb9c87bf88b9f9c401eeecd9aa9c3521bc2adb Author: Field G. Van Zee Date: Tue Mar 26 15:43:23 2019 -0500 Add more support for Eigen to drivers in test/3. Details: - Use compile-time implementations of Eigen in test_gemm.c via new EIGEN cpp macro, defined on command line. (Linking to Eigen's BLAS library is not necessary.) However, as of Eigen 3.3.7, Eigen only parallelizes the gemm operation and not hemm, herk, trmm, trsm, or any other level-3 operation. - Fixed a bug in trmm and trsm drivers whereby the wrong function (bli_does_trans()) was being called to determine whether the object for matrix A should be created for a left- or right-side case. This was corrected by changing the function to bli_is_left(), as is done in the hemm driver. - Added support for running Eigen test drivers from runme.sh. commit c208b9dc46852c877197d53b6dd913a046b6ebb6 Author: Isuru Fernando Date: Mon Mar 25 13:03:44 2019 -0500 Fix clang version detection (#305) clang -dumpversion gives 4.2.1 for all clang versions as clang was originally compatible with gcc 4.2.1 Apple clang version and clang version are two different things and the real clang version cannot be deduced from apple clang version programatically. Rely on wikipedia to map apple clang to clang version Also fixes assembly detection with clang clang 3.8 can't build knl as it doesn't recognize zmm0 commit 53842c7e7d530cb2d5609d6d124ae350fc345c32 Author: Kiran Varaganti Date: Fri Mar 22 13:57:14 2019 +0530 Removed printing alpha and beta values Change-Id: I49102db510311a30f6a936f9d843f35838f50d23 commit 6805db45e343d83d1adaf9157cf0b841653e9ede Author: Kiran Varaganti Date: Fri Mar 22 12:55:35 2019 +0530 Corrected setting alpha & beta values- alpha = -1 and beta = 1 - bli_setc(-1.0, 0, &alpha) should be used rather than bli_setc(0.0, -1.0, &alpha). This corrected now Change-Id: Ic1102dfd6b50ccf212386a1211c6f31e8d987ef9 commit feefcab4427a75b0b55af215486b85abcda314f7 Author: Field G. Van Zee Date: Thu Mar 21 18:11:20 2019 -0500 Allow disabling of BLAS prototypes at compile-time. Details: - Modified bli_blas.h so that: - By default, if the BLAS layer is enabled at configure-time, BLAS prototypes are also enabled within blis.h; - But if the user #defines BLIS_DISABLE_BLAS_DEFS prior to including blis.h, BLAS prototypes are skipped over entirely so that, for example, the application or some other header pulled in by the application may prototype the BLAS functions without causing any duplication. - Updated docs/BuildSystem.md to document the feature above, and related text. commit 20153cd4b594bc34f860c381ec18de3a6cc743c7 Author: Kiran Varaganti Date: Thu Mar 21 16:23:53 2019 +0530 Modified test_gemm.c file in test folder A Macro 'FILE_IN_OUT" is defined to read input parameters from a csv file. Format for input file: Each line defines a gemm problem with following parameters: m k n cs_a cs_b cs_c The operation always implemented is C = C - A*B and column-major format. When macro is disabled - it reverts back to original implementation. Usage: ./test_gemm_.x input.csv output.csv GEMM is called through BLAS interface For BLIS - the test application also prints either 'S' indicating small gemm routine or 'N' - conventional BLIS gemm for MKL/OpenBLAS - ignore this character Change-Id: I0924ef2c1f7bdea48d4cdb230b888e2af2c86a36 commit 288843b06d91e1b4fade337959aef773090bd1c9 Author: Field G. Van Zee Date: Wed Mar 20 17:52:23 2019 -0500 Added Eigen support to test/3 Makefile, runme.sh. Details: - Added targets to test/3/Makefile that link against a BLAS library build by Eigen. It appears, however, that Eigen's BLAS library does not support multithreading. (It may be that multithreading is only available when using the native C++ APIs.) - Updated runme.sh with a few Eigen-related tweaks. - Minor tweaks to docs/Performance.md. commit 153e0be21d9ff413e370511b68d553dd02abada9 Author: Field G. Van Zee Date: Tue Mar 19 17:53:18 2019 -0500 More minor tweaks to docs/Performance.md. Details: - Defined GFLOPS as billions of floating-point operations per second, and reworded the sentence after about normalization. commit 05c4e42642cc0c8dbfa94a6c21e975ac30c0517a Author: Field G. Van Zee Date: Tue Mar 19 17:07:20 2019 -0500 CHANGELOG update (0.5.2) commit 9204cd0cb0cc27790b8b5a2deb0233acd9edeb9b Author: Field G. Van Zee Date: Tue Mar 19 17:07:18 2019 -0500 Version file update (0.5.2) commit 64560cd9248ebf4c02c4a1eeef958e1ca434e510 Author: Field G. Van Zee Date: Tue Mar 19 17:04:20 2019 -0500 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. commit ab5ad557ea69479d487c9a3cb516f43fa1089863 Author: Field G. Van Zee Date: Tue Mar 19 16:50:41 2019 -0500 Very minor tweaks to Performance.md. commit 03c4a25e1aa8a6c21abbb789baa599ac419c3641 Author: Field G. Van Zee Date: Tue Mar 19 16:47:15 2019 -0500 Minor fixes to docs/Performance.md. Details: - Fixed some incorrect labels associated with the pdf/png graphs, apparently the result of copy-pasting. commit fe6dd8b132f39ecb8893d54cd8e75d4bbf6dab83 Author: Field G. Van Zee Date: Tue Mar 19 16:30:23 2019 -0500 Fixed broken section links in docs/Performance.md. Details: - Fixed a few broken section links in the Contents section. commit 913cf97653f5f9a40aa89a5b79e2b0a8882dd509 Author: Field G. Van Zee Date: Tue Mar 19 16:15:24 2019 -0500 Added docs/Performance.md and docs/graphs subdir. Details: - Added a new markdown document, docs/Performance.md, which reports performance of a representative set of level-3 operations across a variety of hardware architectures, comparing BLIS to OpenBLAS and a vendor library (MKL on Intel/AMD, ARMPL on ARM). Performance graphs, in pdf and png formats, reside in docs/graphs. - Updated README.md to link to new Performance.md document. - Minor updates to CREDITS, docs/Multithreading.md. - Minor updates to matlab scripts in test/3/matlab. commit 9945ef24fd758396b698b19bb4e23e53b9d95725 Author: Field G. Van Zee Date: Tue Mar 19 15:28:44 2019 -0500 Adjusted cache blocksizes for zen subconfig. Details: - Adjusted the zen sub-configuration's cache blocksizes for float, scomplex, and dcomplex based on the existing values for double. (The previous values were taken directly from the haswell subconfig, which targets Intel Haswell/Broadwell/Skylake systems.) commit d202d008d51251609d08d3c278bb6f4ca9caf8e4 Author: Field G. Van Zee Date: Mon Mar 18 18:18:25 2019 -0500 Renamed --enable-export-all to --export-shared=[]. Details: - Replaced the existing --enable-export-all / --disable-export-all configure option with --export-shared=[public|all], with the 'public' instance of the latter corresponding to --disable-export-all and the 'all' instance corresponding to --enable-export-all. Nothing else semantically about the option, or its default, has changed. commit ff78089870f714663026a7136e696603b5259560 Author: Field G. Van Zee Date: Mon Mar 18 13:22:55 2019 -0500 Updates to docs/Multithreading.md. Details: - Made extra explicit the fact that: (a) multithreading in BLIS is disabled by default; and (b) even with multithreading enabled, the user must specify multithreading at runtime in order to observe parallelism. Thanks to M. Zhou for suggesting these clarifications in #292. - Also made explicit that only the environment variable and global runtime API methods are available when using the BLAS API. If the user wishes to use the local runtime API (specify multithreading on a per-call basis), one of the native BLIS APIs must be used. commit 3a929a3d0ba0353159a6d4cd188f01b7a390ccfc Author: Kiran Varaganti Date: Mon Mar 18 10:51:41 2019 +0530 Fixed code merging: bli_gemm_small.c - missed conditional checks for L!=0 && K!=0. Now they are added. This fix is done to pass blastest Change-Id: Idc9c9a04d2015a68a19553c437ecaf8f1584026c commit 663f662932c3f182fefc3c77daa1bf8c3394bb8b Merge: 938c05ef 6bfe3812 Author: Field G. Van Zee Date: Sat Mar 16 16:17:12 2019 -0500 Merge branch 'amd' of github.com:flame/blis into amd commit 938c05ef8654e2fc013d39a57f51d91d40cc40fb Merge: 4ed39c09 5a5f494e Author: Field G. Van Zee Date: Sat Mar 16 16:01:43 2019 -0500 Merge branch 'amd' of github.com:flame/blis into amd commit 6bfe3812e29b86c95b828822e4e5473b48891167 Author: Field G. Van Zee Date: Fri Mar 15 13:57:49 2019 -0500 Use -fvisibility=[...] with clang on Linux/BSD/OSX. Details: - Modified common.mk to use the -fvisibility=[hidden|default] option when compiling with clang on non-Windows platforms (Linux, BSD, OS X, etc.). Thanks to Isuru Fernando for pointing out this option works with clang on these OSes. commit 809395649c5bbf48778ede4c03c1df705dd49566 Author: Field G. Van Zee Date: Wed Mar 13 18:21:35 2019 -0500 Annotated additional symbols for export. Details: - Added export annotations to additional function prototypes in order to accommodate the testsuite. - Disabled calling bli_amaxv_check() from within the testsuite's test_amaxv.c. commit e095926c643fd9c9c2220ebecd749caae0f71d42 Author: Field G. Van Zee Date: Wed Mar 13 17:35:18 2019 -0500 Support shared lib export of only public symbols. Details: - Introduced a new configure option, --enable-export-all, which will cause all shared library symbols to be exported by default, or, alternatively, --disable-export-all, which will cause all symbols to be hidden by default, with only those symbols that are annotated for visibility, via BLIS_EXPORT_BLIS (and BLIS_EXPORT_BLAS for BLAS symbols), to be exported. The default for this configure option is --disable-export-all. Thanks to Isuru Fernando for consulting on this commit. - Removed BLIS_EXPORT_BLIS annotations from frame/1m/bli_l1m_unb_var1.h, which was intended for 5a5f494. - Relocated BLIS_EXPORT-related cpp logic from bli_config.h.in to frame/include/bli_config_macro_defs.h. - Provided appropriate logic within common.mk to implement variable symbol visibility for gcc, clang, and icc (to the extend that each of these compilers allow). - Relocated --help text associated with debug option (-d) to configure slightly further down in the list. commit 5a5f494e428372c7c27ed1f14802e15a83221e87 Author: Field G. Van Zee Date: Tue Mar 12 18:45:09 2019 -0500 Removed export macros from all internal prototypes. Details: - After merging PR #303, at Isuru's request, I removed the use of BLIS_EXPORT_BLIS from all function prototypes *except* those that we potentially wish to be exported in shared/dynamic libraries. In other words, I removed the use of BLIS_EXPORT_BLIS from all prototypes of functions that can be considered private or for internal use only. This is likely the last big modification along the path towards implementing the functionality spelled out in issue #248. Thanks again to Isuru Fernando for his initial efforts of sprinkling the export macros throughout BLIS, which made removing them where necessary relatively painless. Also, I'd like to thank Tony Kelman, Nathaniel Smith, Ian Henriksen, Marat Dukhan, and Matthew Brett for participating in the initial discussion in issue #37 that was later summarized and restated in issue #248. - CREDITS file update. commit 3dc18920b6226026406f1d2a8b2c2b405a2649d5 Merge: b938c16b 766769ee Author: Field G. Van Zee Date: Tue Mar 12 11:20:25 2019 -0500 Merge branch 'master' into dev commit 766769eeb944bd28641a6f72c49a734da20da755 Author: Isuru Fernando Date: Mon Mar 11 19:05:32 2019 -0500 Export functions without def file (#303) * Revert "restore bli_extern_defs exporting for now" This reverts commit 09fb07c350b2acee17645e8e9e1b8d829c73dca8. * Remove symbols not intended to be public * No need of def file anymore * Fix whitespace * No need of configure option * Remove export macro from definitions * Remove blas export macro from definitions commit 4ed39c0971c7917e2675cf5449f563b1f4751ccc Merge: 540ec1b4 b938c16b Author: Field G. Van Zee Date: Fri Mar 8 11:56:58 2019 -0600 Merge branch 'amd' of github.com:flame/blis into amd commit b938c16b0c9e839335ac2c14944b82890143d02f Author: Field G. Van Zee Date: Thu Mar 7 16:40:39 2019 -0600 Renamed test/3m4m to test/3. Details: - Renamed '3m4m' directory to '3', which captures the directory nicely since it builds test drivers to test level-3 operations. - These test drivers ceased to be used to test the 3m and 4m (or even 1m) induced methods long ago, hence the name change. commit ab89a40582ec7acf802e59b0763bed099a02edd8 Author: Field G. Van Zee Date: Thu Mar 7 16:26:12 2019 -0600 More minor updates and edits to test/3m4m. Details: - Further updates to matlab scripts, mostly for compatibility with GNU Octave. - More tweaks to runme.sh. - Updates to runme.m that allow copy-paste into matlab interactive session to generate graphs. commit f0e70dfbf3fee4c4e382c2c4e87c25454cbc79a1 Author: Field G. Van Zee Date: Thu Mar 7 01:04:05 2019 +0000 Very minor updates to test/3m4m for ul252. Details: - Very minor updates to the newly revamped test/3m4m drivers when used on a Xeon Platinum (SkylakeX). commit 7fe44748383071f1cbbc77d904f4ae5538e13065 Author: Kiran Varaganti Date: Wed Mar 6 16:23:31 2019 +0530 Disabled BLIS_ENABLE_ZEN_BLOCK_SIZES in bli_family_zen.h for ROME tuning Change-Id: Iec47fcf51f4d4396afef1ce3958e58cf02c59a57 commit 9f1dbe572b1fd5e7dd30d5649bdf59259ad770d5 Author: Field G. Van Zee Date: Tue Mar 5 17:47:55 2019 -0600 Overhauled test/3m4m Makefile and scripts. Details: - Rewrote much of Makefile to generate executables for single- and dual- socket multithreading as well as single-threaded. Each of the three can also use a different problem size range/increment, as is often appropriate when doubling/halving the number of threads. - Rewrote runme.sh script to flexibly execute as many threading parameter scenarios as is given in the input parameter string (currently set within the script itself). The string also encodes the maximum problem size for each threading scenario, which is used to identify the executable to run. Also improved the "progress" output of the script to reduce redundant info and improve readability in terminals that are not especially wide. - Minor updates to test_*.c source files. - Updated matlab scripts according to changes made to the Makefile, test drivers, and runme.sh script, and renamed 'plot_all.m' to 'runme.m'. commit f5ed95ecd7d5eb4a63e1333ad5cc6765fc8df9fe Author: Kiran Varaganti Date: Tue Mar 5 15:01:57 2019 +0530 Merged BLIS Release 1.3 Modified config/zen/make_defs.mk, now CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=znver1 Change-Id: Ia0942d285a21447cd0c470de1bc021fe63e80d81 commit 3bdab823fa93342895bf45d812439324a37db77c Merge: 70f12f20 e2a02ebd Author: Field G. Van Zee Date: Thu Feb 28 14:07:24 2019 -0600 Merge branch 'master' into dev commit e2a02ebd005503c63138d48a2b7d18978ee29205 Author: Field G. Van Zee Date: Thu Feb 28 13:58:59 2019 -0600 Updates (from ls5) to test/3m4m/runme.sh. Details: - Lonestar5-specific updates to runme.sh. commit f0dcc8944fa379d53770f5cae5d670140918f00c Author: Isuru Fernando Date: Wed Feb 27 17:27:23 2019 -0600 Add symbol export macro for all functions (#302) * initial export of blis functions * Regenerate def file for master * restore bli_extern_defs exporting for now commit 540ec1b479712d5e1da637a718927249c15d867f Author: Field G. Van Zee Date: Sun Feb 24 19:09:10 2019 -0600 Updated level-3 BLAS to call object API directly. Details: - Updated the BLAS compatibility layer for level-3 operations so that the corresponding BLIS object API is called directly rather than first calling the typed BLIS API. The previous code based on the typed BLIS API calls is still available in a deactivated cpp macro branch, which may be re-activated by #defining BLIS_BLAS3_CALLS_TAPI. (This does not yet correspond to a configure option. If it seems like people might want to toggle this behavior more regularly, a configure option can be added in the future.) - Updated the BLIS typed API to statically "pre-initialize" objects via new initializor macros. Initialization is then finished via calls to static functions bli_obj_init_finish_1x1() and bli_obj_init_finish(), which are similar to the previously-called functions, bli_obj_create_1x1_with_attached_buffer() and bli_obj_create_with_attached_buffer(), respectively. (The BLAS compatibility layer updates mentioned above employ this new technique as well.) - Transformed certain routines in bli_param_map.c--specifically, the ones that convert netlib-style parameters to BLIS equivalents--into static functions, now in bli_param_map.h. (The remaining three classes of conversation routines were left unchanged.) - Added the aforementioned pre-initializor macros to bli_type_defs.h. - Relocated bli_obj_init_const() and bli_obj_init_constdata() from bli_obj_macro_defs.h to bli_type_defs.h. - Added a few macros to bli_param_macro_defs.h for testing domains for real/complexness and precisions for single/double-ness. commit 8e023bc914e9b4ac1f13614feb360b105fbe44d2 Author: Field G. Van Zee Date: Fri Feb 22 16:55:30 2019 -0600 Updates to 3m4m/matlab scripts. Details: - Minor updates to matlab graph-generating scripts. - Added a plot_all.m script that is more of a scratchpad for copying and pasting function invocations into matlab to generate plots that are presently of interest to us. commit b06244d98cc468346eb1a8eb931bc05f35ff280c Merge: e938ff08 4c7e6680 Author: praveeng Date: Thu Feb 21 12:56:15 2019 +0530 Merge branch 'ut-austin-amd' of ssh://git.amd.com:29418/cpulibraries/er/blis into ut-austin-amd commit e938ff08cea3d108c84524eb129d9e89d701ea90 Author: praveeng Date: Thu Feb 21 12:44:38 2019 +0530 deleted test.txt Change-Id: I3871f5fe76e548bc29ec2733745b29964e829dd3 commit ed13ad465dcba350ad3d5e16c9cc7542e33f3760 Author: mkv Date: Thu Feb 21 01:04:16 2019 -0500 added test file for initial commit commit 4c7e6680832b497468cf50c2399e3ac4de0e3450 Author: praveeng Date: Thu Feb 21 12:44:38 2019 +0530 deleted test.txt Change-Id: I3871f5fe76e548bc29ec2733745b29964e829dd3 commit 95e070581c54ed2edc211874faec56055ea298c8 Author: mkv Date: Thu Feb 21 01:04:16 2019 -0500 added test file for initial commit commit 70f12f209bc1901b5205902503707134cf2991a0 Author: Field G. Van Zee Date: Wed Feb 20 16:10:10 2019 -0600 Changed unsafe-loop to unsafe-math optimizations. Details: - Changed -funsafe-loop-optimizations (re-)introduced in 7690855 for make_defs.mk files' CRVECFLAGS to -funsafe-math-optimizations (to account for a miscommunication in issue #300). Thanks to Dave Love for this suggestion and Jeff Hammond for his feedback on the topic. commit 7690855c5106a56e5b341a350f8db1c78caacd89 Author: Field G. Van Zee Date: Mon Feb 18 19:16:01 2019 -0600 Restored -funsafe-loop-optimizations to subconfigs. Details: - Restored use of -funsafe-loop-optimizations in the definitions of CRVECFLAGS (when using gcc), but only for sub-configurations (and not configuration families such as amd64, intel64, and x86_64). This more or less reverts 5190d05 and 6cf1550. commit 44994d1490897b08cde52a615a2e37ddae8b2061 Author: Field G. Van Zee Date: Mon Feb 18 18:35:30 2019 -0600 Disable TBM, XOP, LWP instructions in AMD configs. Details: - Added -mno-tbm -mno-xop -mno-lwp to CKVECFLAGS in bulldozer, piledriver, steamroller, and excavator configurations to explicitly disable AMD's bulldozer-era TBM, XOP, and LWP instruction sets in an attempt to fix the invalid instruction error that has plagued Travis CI builds since 6a014a3. Thanks to Devin Matthews for pointing out that the offending instruction was part of TBM (issue #300). - Restored -O3 to piledriver configuration's COPTFLAGS. commit 1e5b530744c1906140d47f43c5cad235eaa619cf Author: Field G. Van Zee Date: Mon Feb 18 18:04:38 2019 -0600 Reverted piledriver COPTFLAGS from -O3 to -O2. Details: - Debugging continues; changing COPTFLAGS for piledriver subconfig from -O3 to -O2, its original value prior to 6a014a3. commit 6cf155049168652c512aefdd16d74e7ff39b98df Author: Field G. Van Zee Date: Mon Feb 18 17:29:51 2019 -0600 Removed -funsafe-loop-optimizations from all configs. Details: - Error persists. Removed -funsafe-loop-optimizations from all remaining sub-configurations. commit 5190d05a27c5fa4c7942e20094f76eb9a9785c3e Author: Field G. Van Zee Date: Mon Feb 18 17:07:35 2019 -0600 Removed -funsafe-loop-optimizations from piledriver. Details: - Error persists; continuing debugging from bf0fb78c by removing -funsafe-loop-optimizations from piledriver configuration. commit bf0fb78c5e575372060d22f5ceeb5b332e8978ec Author: Field G. Van Zee Date: Mon Feb 18 16:51:38 2019 -0600 Removed -funsafe-loop-optimizations from families. Details: - Removed -funsafe-loop-optimizations from the configuration families affected by 6a014a3, specifically: intel64, amd64, and x86_64. This is part of an attempt to debug why the sde, as executed by Travis CI, is crashing via the following error: TID 0 SDE-ERROR: Executed instruction not valid for specified chip (ICELAKE): 0x9172a5: bextr_xop rax, rcx, 0x103 commit 6a014a3377a2e829dbc294b814ca257a2bfcb763 Author: Field G. Van Zee Date: Mon Feb 18 14:52:29 2019 -0600 Standardized optimization flags in make_defs.mk. Details: - Per Dave Love's recommendation in issue #300, this commit defines COPTFLAGS := -03 and CRVECFLAGS := $(CKVECFLAGS) -funsafe-loop-optimizations in the make_defs.mk for all Intel- and AMD-based configurations. commit 565fa3853b381051ac92cff764625909d105644d Author: Field G. Van Zee Date: Mon Feb 18 11:43:58 2019 -0600 Redirect trsm pc, ir parallelism to ic, jr loops. Details: - trsm parallelization was temporarily simplifed in 075143d to entirely ignore any parallelism specified via the pc or ir loops. Now, any parallelism specified to the pc loop will be redirected to the ic loop, and any parallelism specified to the ir loop will be redirected to the jr loop. (Note that because of inter-iteration dependencies, trsm cannot parallelize the ir loop. Parallelism via the pc loop is at least somewhat feasible in theory, but it would require tracking dependencies between blocks--something for which BLIS currently lacks the necessary supporting infrastructure.) commit a023c643f25222593f4c98c2166212561d030621 Author: Field G. Van Zee Date: Thu Feb 14 20:18:55 2019 -0600 Regenerated symbols in build/libblis-symbols.def. Details: - Reran ./build/regen-symbols.sh after running 'configure --enable-cblas auto' commit 075143dfd92194647da9022c1a58511b20fc11f3 Author: Field G. Van Zee Date: Thu Feb 14 18:52:45 2019 -0600 Added support for IC loop parallelism to trsm. Details: - Parallelism within the IC loop (3rd loop around the microkernel) is now supported within the trsm operation. This is done via a new branch on each of the control and thread trees, which guide execution of a new trsm-only subproblem from within bli_trsm_blk_var1(). This trsm subproblem corresponds to the macrokernel computation on only the block of A that contains the diagonal (labeled as A11 in algorithms with FLAME-like partitioning), and the corresponding row panel of C. During the trsm subproblem, all threads within the JC communicator participate and parallelize along the JR loop, including any parallelism that was specified for the IC loop. (IR loop parallelism is not supported for trsm due to inter-iteration dependencies.) After this trsm subproblem is complete, a barrier synchronizes all participating threads and then they proceed to apply the prescribed BLIS_IC_NT (or equivalent) ways of parallelism (and any BLIS_JR_NT parallelism specified within) to the remaining gemm subproblem (the rank-k update that is performed using the newly updated row-panel of B). Thus, trsm now supports JC, IC, and JR loop parallelism. - Modified bli_trsm_l_cntl_create() to create the new "prenode" branch of the trsm_l cntl_t tree. The trsm_r tree was left unchanged, for now, since it is not currently used. (All trsm problems are cast in terms of left-side trsm.) - Updated bli_cntl_free_w_thrinfo() to be able to free the newly shaped trsm cntl_t trees. Fixed a potentially latent bug whereby a cntl_t subnode is only recursed upon if there existed a corresponding thrinfo_t node, which may not always exist (for problems too small to employ full parallelization due to the minimum granularity imposed by micropanels). - Updated other functions in frame/base/bli_cntl.c, such as bli_cntl_copy() and bli_cntl_mark_family(), to recurse on sub-prenodes if they exist. - Updated bli_thrinfo_free() to recurse into sub-nodes and prenodes when they exist, and added support for growing a prenode branch to bli_thrinfo_grow() via a corresponding set of help functions named with the _prenode() suffix. - Added a bszid_t field thrinfo_t nodes. This field comes in handy when debugging the allocation/release of thrinfo_t nodes, as it helps trace the "identity" of each nodes as it is created/destroyed. - Renamed bli_l3_thrinfo_print_paths() -> bli_l3_thrinfo_print_gemm_paths() and created a separate bli_l3_thrinfo_print_trsm_paths() function to print out the newly reconfigured thrinfo_t trees for the trsm operation. - Trival changes to bli_gemm_blk_var?.c and bli_trsm_blk_var?.c regarding variable declarations. - Removed subpart_t enum values BLIS_SUBPART1T, BLIS_SUBPART1B, BLIS_SUBPART1L, BLIS_SUBPART1R. Then added support for two new labels (semantically speaking): BLIS_SUBPART1A and BLIS_SUBPART1B, which represent the subpartition ahead of and behind, respectively, BLIS_SUBPART1. Updated check functions in bli_check.c accordingly. - Shuffled layering/APIs for bli_acquire_mpart_[mn]dim() and bli_acquire_mpart_t2b/b2t(), _l2r/r2l(). - Deprecated old functions in frame/3/bli_l3_thrinfo.c. commit 78bc0bc8b6b528c79b11f81ea19250a1db7450ed Author: Nicholai Tukanov Date: Thu Feb 14 13:29:02 2019 -0600 Power9 sub-configuration (#298) Formally registered power9 sub-configuration. Details: - Added and registered power9 sub-configuration into the build system. Thanks to Nicholai Tukanov and Devangi Parikh for these contributions. - Note: The sub-configuration does not yet have a corresponding architecture-specific kernel set registered, and so for now the sub-config is using the generic kernel set. commit 6b832731261f9e7ad003a9ea4682e9ca973ef844 Author: Field G. Van Zee Date: Tue Feb 12 16:01:28 2019 -0600 Generalized ref kernels' pragma omp simd usage. Details: - Replaced direct usage of _Pragma( "omp simd" ) in reference kernels with PRAGMA_SIMD, which is defined as a function of the compiler being used in a new bli_pragma_macro_defs.h file. That definition is cleared when BLIS detects that the -fopenmp-simd command line option is unsupported. Thanks to Devin Matthews and Jeff Hammond for suggestions that guided this commit. - Updated configure and bli_config.h.in so that the appropriate anchor is substituted in (when the corresponding pragma omp simd support is present). commit b1f5ce8622b682b79f956fed83f04a60daa8e0fc Author: Field G. Van Zee Date: Tue Feb 5 17:38:50 2019 -0600 Minor updates to scripts in test/mixeddt/matlab. commit 38203ecd15b1fa50897d733daeac6850d254e581 Author: Devangi N. Parikh Date: Mon Feb 4 15:28:28 2019 -0500 Added thunderx2 system in the mixeddt test scripts Details: - Added thunderx2 (tx2) as a system in the runme.sh in test/mixeddt commit dfc91843ea52297bf636147793029a0c1345be04 Author: Devangi N. Parikh Date: Mon Feb 4 15:23:40 2019 -0500 Fixed gcc flags for thunderx2 subconfiguration Details: - Fixed -march flag. Thunderx2 is an armv8.1a architecture not armv8a. commit c665eb9b888ec7e41bd0a28c4c8ac4094d0a01b5 Author: Field G. Van Zee Date: Mon Jan 28 16:22:23 2019 -0600 Minor updates to docs, Makefiles. Details: - Changed all occurrances of micro-kernel -> microkernel macro-kernel -> macrokernel micro-panel -> micropanel in all markdown documents in 'docs' directory. This change is being made since we've reached the point in adoption and acceptance of BLIS's insights where words such as "microkernel" are no longer new, and therefore now merit being unhyphenated. - Updated "Implementation Notes" sections of KernelsHowTo.md, which still contained references to nonexistent cpp macros such as BLIS_DEFAULT_MR_? and BLIS_PACKDIM_MR_?. - Added 'run-fast' and 'check-fast' targets to testsuite/Makefile. - Minor updates to Testsuite.md, including suggesting use of 'make check' and 'make check-fast' when running from the local testsuite directory. - Added a comment to top-level Makefile explaining the purpose behind the TESTSUITE_WRAPPER variable, which at first glance appears to serve no purpose. commit 1aa280d0520ed5eaea3b119b4e92b789ecad78a4 Author: M. Zhou <5723047+cdluminate@users.noreply.github.com> Date: Sun Jan 27 21:40:48 2019 +0000 Amend OS detection for kFreeBSD. (#295) commit fffc23bb35d117a433886eb52ee684ff5cf6997f Author: Field G. Van Zee Date: Fri Jan 25 13:35:31 2019 -0600 CREDITS file update. commit 26c5cf495ce22521af5a36a1012491213d5a4551 Author: Field G. Van Zee Date: Thu Jan 24 18:49:31 2019 -0600 Fixed bug in skx subconfig related to bdd46f9. Details: - Fixed code in the skx subconfiguration that became a bug after committing bdd46f9. Specifically, the bli_cntx_init_skx() function was overwriting default blocksizes for the scomplex and dcomplex microkernels despite the fact that only single and double real microkernels were being registered. This was not a problem prior to bdd46f9 since all microkernels used dynamically-queried (at runtime) register blocksizes for loop bounds. However, post-bdd46f9, this became a bug because the reference ukernels for scomplex and dcomplex were written with their register blocksizes hard-coded as constant loop bounds, which conflicted the the erroneous scomplex and dcomplex values that bli_cntx_init_skx() was setting in the context. The lesson here is that going forward, all subconfigurations must not set any blocksizes for datatypes corresponding to default/reference microkernels. (Note that a blocksize is left unchanged by the bli_cntx_set_blkszs() function if it was set to -1.) commit 180f8e42e167b83a757340ad4bd4a5c7a1d6437b Author: Field G. Van Zee Date: Thu Jan 24 18:01:15 2019 -0600 Fixed undefined behavior trsm ukr bug in bdd46f9. Details: - Fixed a bug that mainfested anytime a configuration was used in which optimized microkernels were registered and the trsm operation (or kernel) was invoked. The bug resulted from the optimized microkernels' register blocksizes conflicting with the hard-coded values--expressed in the form of constant loop bounds--used in the new reference trsm ukernels that were introduced in bdd46f9. The fix was easy: reverting back to the implementation that uses variable-bound loops, which amounted to changing an #if 0 to #if 1 (since I preserved the older implementation in the file alongside the new code based on constant- bound loops). It should be noted that this fix must be permanent, since the trsm kernel code with constant-bound loops can never work with gemm ukernels that use different register blocksizes. commit bdd46f9ee88057d52610161966a11c224e5a026c Author: Field G. Van Zee Date: Thu Jan 24 17:23:18 2019 -0600 Rewrote reference kernels to use #pragma omp simd. Details: - Rewrote level-1v, -1f, and -3 reference kernels in terms of simplified indexing annotated by the #pragma omp simd directive, which a compiler can use to vectorize certain constant-bounded loops. (The new kernels actually use _Pragma("omp simd") since the kernels are defined via templatizing macros.) Modest speedup was observed in most cases using gcc 5.4.0, which may improve with newer versions. Thanks to Devin Matthews for suggesting this via issue #286 and #259. - Updated default blocksizes defined in ref_kernels/bli_cntx_ref.c to be 4x16, 4x8, 4x8, and 4x4 for single, double, scomplex and dcomplex, respectively, with a default row preference for the gemm ukernel. Also updated axpyf, dotxf, and dotxaxpyf fusing factors to 8, 6, and 4, respectively, for all datatypes. - Modified configure to verify that -fopenmp-simd is a valid compiler option (via a new detect/omp_simd/omp_simd_detect.c file). - Added a new header in which prefetch macros are defined according to which compiler is detected (via macros such as __GNUC__). These prefetch macros are not yet employed anywhere, though. - Updated the year in copyrights of template license headers in build/templates and removed AMD as a default copyright holder. commit 63de2b0090829677755eb5cdb27e73bc738da32d Author: Field G. Van Zee Date: Wed Jan 23 12:16:27 2019 -0600 Prevent redef of ftnlen in blastest f2c_types.h. Details: - Guard typedef of ftnlen in f2c_types.h with a #ifndef HAVE_BLIS_H directive to prevent the redefinition of that type. Thanks to Jeff Diamond for reporting this compiler warning (and apologies for the delay in committing a fix). commit eec2e183a7b7d67702dbd1f39c153f38148b2446 Author: Field G. Van Zee Date: Mon Jan 21 12:12:18 2019 -0600 Added escaping to '/' in os_name in configure. Details: - Add os_name to the list of variables into which the '/' character is escaped. This is meant to address (or at least make progress toward addressing) #293. Thanks to Isuru Fernando for spotting this as the potential fix, and also thanks to M. Zhou for the original report. commit adf5c17f0839fdbc1f4a1780f637928b1e78e389 Author: Field G. Van Zee Date: Fri Jan 18 15:14:45 2019 -0600 Formally registered thunderx2 subconfiguration. Details: - Added a separate subconfiguration for thunderx2, which now uses different optimization flags than cortexa57/cortexa53. commit 094cfdf7df6c2764c25fcbfce686ba29b933942c Author: M. Zhou <5723047+cdluminate@users.noreply.github.com> Date: Fri Jan 18 18:46:13 2019 +0000 Port BLIS to GNU Hurd OS. (#294) Prevent blis.h from misidentifying Hurd as OSX. commit 5d7d616e8e591c2f3c7c2d73220eb27ea484f9c9 Author: Field G. Van Zee Date: Tue Jan 15 20:52:51 2019 -0600 README.md update re: mixeddt TOMS paper. commit 58c7fb4788177487f73a3964b7a910fe4dc75941 Author: Field G. Van Zee Date: Tue Jan 8 17:00:27 2019 -0600 Added more matlab scripts for mixeddt paper. Details: - Added a variant set of matlab scripts geared to producing plots that reflect performance data gathered with and without extra memory optimizations enabled. These scripts reside (for now) in test/mixeddt/matlab/wawoxmem. commit 34286eb914b48b56cdda4dfce192608b9f86d053 Author: Field G. Van Zee Date: Tue Jan 8 11:41:20 2019 -0600 Minor update to docs/HardwareSupport.md. commit 108b04dc5b1b1288db95f24088d1e40407d7bc88 Author: Field G. Van Zee Date: Mon Jan 7 20:16:31 2019 -0600 Regenerated symbols in build/libblis-symbols.def. Details: - Reran ./build/regen-symbols.sh after running 'configure --enable-cblas auto' to reflect removal of bli_malloc_pool() and bli_free_pool(). commit 706cbd9d5622f4690e6332a89cf41ab5c8771899 Author: Field G. Van Zee Date: Mon Jan 7 18:28:19 2019 -0600 Minor tweaks/cleanups to bli_malloc.c, _apool.c. Details: - Removed malloc_ft and free_ft function pointer arguments from the interface to bli_apool_init() after deciding that there is no need to specify the malloc()/free() for blocks within the apool. (The apool blocks are actually just array_t structs.) Instead, we simply call bli_malloc_intl()/_free_intl() directly. This has the added benefit of allowing additional output when memory tracing is enabled via --enable-mem-tracing. Also made corresponding changes elsewhere in the apool API. - Changed the inner pools (elements of the array_t within the apool_t) to use BLIS_MALLOC_POOL and BLIS_FREE_POOL instead of BLIS_MALLOC_INTL and BLIS_FREE_INTL. - Disabled definitions of bli_malloc_pool() and bli_free_pool() since there are no longer any consumers of these functions. - Very minor comment / printf() updates. commit 579145039d945adbcad1177b1d53fb2d3f2e6573 Author: Minh Quan Ho <1337056+hominhquan@users.noreply.github.com> Date: Mon Jan 7 23:00:15 2019 +0100 Initialize error messages at compile time (#289) * Initialize error messages at compile time - Assigning strings directly to the bli_error_string array, instead of snprintf() at execution-time. * Retired bli_error_init(), _finalize(). Details: - Removed functions obviated by changes in 80e8dc6: bli_error_init(), bli_error_finalize(), and bli_error_init_msgs(), as well as calls to the former two in bli_init.c. * Regenerated symbols in build/libblis-symbols.def. Details: - Reran ./build/regen-symbols.sh after running 'configure --enable-cblas auto'. commit aafbca086e36b6727d7be67e21fef5bd9ff7bfd9 Author: Field G. Van Zee Date: Mon Jan 7 12:38:21 2019 -0600 Updated external package language in README.md. Details: - Updated/added comments about Fedora, OpenSUSE, and GNU Guix under the newly-renamed "External GNU/Linux packages" section. Thanks to Dave Love for providing these revisions. commit daacfe68404c9cc8078e5e7ba49a8c7d93e8cda3 Author: Field G. Van Zee Date: Mon Jan 7 12:12:47 2019 -0600 Allow running configure with python 3.4. Details: - Relax version blacklisting of python3 to allow 3.4 or later instead of 3.5 or later. Thanks to Dave Love for pointing out that 3.4 was sufficient for the purpose of BLIS's build system. (It should be noted that we're not sure which, if any, python3 versions prior to 3.4 are insufficient, and that the only thing stopping us from determining this is the fact that these earlier versions of python3 are not readily available for us to test with.) - Updated docs/BuildSystem.md to be explicit about current python2 vs python3 version requirements. commit cdbf16aa93234e0d6a80f0d0e385ec81e7b75465 Author: prangana Date: Fri Jan 4 15:59:21 2019 +0530 Update version 1.3 Change-Id: I32a7d24af860e87a60396614075236afb65a28a9 commit cf9c1150515b8e9cc4f12e0d4787b3471b12ba4a Author: kdevraje Date: Thu Jan 3 09:51:46 2019 +0530 This commit adds a macro, which is to be enabled when BLIS is working on single instance mode Change-Id: I7f3fd654b78e64c4e6e24e9f0e245b1a30c492b0 commit ad8d9adb09a7dd267bbdeb2bd1fbbf9daf64ee76 Author: Field G. Van Zee Date: Thu Jan 3 16:08:24 2019 -0600 README.md, CREDITS update. Details: - Added "What's New" and "What People Are Saying About BLIS" sections to README.md. - Added missing github handles to various individuals' entries in the CREDITS file. commit 7052fca5aef430241278b67d24cef6fe33106904 Author: Field G. Van Zee Date: Wed Jan 2 13:48:40 2019 -0600 Apply f272c289 to bli_fmalloc_noalign(). Details: - Perform the same check for NULL return values and error message output in bli_fmalloc_noalign() as is performed by bli_fmalloc_align(). (This change was intended for f272c289.) commit 528e3ad16a42311a852a8376101959b4ccd801a5 Merge: 3126c52e f272c289 Author: Field G. Van Zee Date: Wed Jan 2 13:39:19 2019 -0600 Merge branch 'amd' commit 3126c52ea795ffb7d30b16b7f7ccc2a288a6158d Merge: 61441b24 8091998b Author: Field G. Van Zee Date: Wed Jan 2 13:37:37 2019 -0600 Merge branch 'amd' commit f272c2899a6764eedbe05cea874ee3bd258dbff3 Author: Field G. Van Zee Date: Wed Jan 2 12:34:15 2019 -0600 Add error message to malloc() check for NULL. Details: - Output an error message if and when the malloc()-equivalent called by bli_fmalloc_align() ever returns NULL. Everything was already in place for this to happen, including the error return code, the error string sprintf(), the error checking function bli_check_valid_malloc_buf() definition, and its prototype. Thanks to Minh Quan Ho for pointing out the missing error message. - Increased the default block_ptrs_len for each inner pool stored in the small block allocator from 10 to 25. Under normal execution, each thread uses only 21 blocks, so this change will prevent the sba from needing to resize the block_ptrs array of any given inner pool as threads initially populate the pool with small blocks upon first execution of a level-3 operation. - Nix stray newline echo in configure. commit eb97f778a1e13ee8d3b3aade05e479c4dfcfa7c0 Author: Field G. Van Zee Date: Tue Dec 25 20:17:09 2018 -0600 Added missing AMD copyrights to previous commit. Details: - Forgot to add AMD copyrights to several touched files that did not already have them in 2f31743. commit 2f3174330fb29164097d664b7c84e05c7ced7d95 Author: Field G. Van Zee Date: Tue Dec 25 19:35:01 2018 -0600 Implemented a pool-based small block allocator. Details: - Implemented a sophisticated data structure and set of APIs that track the small blocks of memory (around 80-100 bytes each) used when creating nodes for control and thread trees (cntl_t and thrinfo_t) as well as thread communicators (thrcomm_t). The purpose of the small block allocator, or sba, is to allow the library to transition into a runtime state in which it does not perform any calls to malloc() or free() during normal execution of level-3 operations, regardless of the threading environment (potentially multiple application threads as well as multiple BLIS threads). The functionality relies on a new data structure, apool_t, which is (roughly speaking) a pool of arrays, where each array element is a pool of small blocks. The outer pool, which is protected by a mutex, provides separate arrays for each application thread while the arrays each handle multiple BLIS threads for any given application thread. The design minimizes the potential for lock contention, as only concurrent application threads would need to fight for the apool_t lock, and only if they happen to begin their level-3 operations at precisely the same time. Thanks to Kiran Varaganti and AMD for requesting this feature. - Added a configure option to disable the sba pools, which are enabled by default; renamed the --[dis|en]able-packbuf-pools option to --[dis|en]able-pba-pools; and rewrote the --help text associated with this new option and consolidated it with the --help text for the option associated with the sba (--[dis|en]able-sba-pools). - Moved the membrk field from the cntx_t to the rntm_t. We now pass in a rntm_t* to the bli_membrk_acquire() and _release() APIs, just as we do for bli_sba_acquire() and _release(). - Replaced all calls to bli_malloc_intl() and bli_free_intl() that are used for small blocks with calls to bli_sba_acquire(), which takes a rntm (in addition to the bytes requested), and bli_sba_release(). These latter two functions reduce to the former two when the sba pools are disabled at configure-time. - Added rntm_t* arguments to various cntl_t and thrinfo_t functions, as required by the new usage of bli_sba_acquire() and _release(). - Moved the freeing of "old" blocks (those allocated prior to a change in the block_size) from bli_membrk_acquire_m() to the implementation of the pool_t checkout function. - Miscellaneous improvements to the pool_t API. - Added a block_size field to the pblk_t. - Harmonized the way that the trsm_ukr testsuite module performs packing relative to that of gemmtrsm_ukr, in part to avoid the need to create a packm control tree node, which now requires a rntm_t that has been initialized with an sba and membrk. - Re-enable explicit call bli_finalize() in testsuite so that users who run the testsuite with memory tracing enabled can check for memory leaks. - Manually imported the compact/minor changes from 61441b24 that cause the rntm to be copied locally when it is passed in via one of the expert APIs. - Reordered parameters to various bli_thrcomm_*() functions so that the thrcomm_t* to the comm being modified is last, not first. - Added more descriptive tracing for allocating/freeing small blocks and formalized via a new configure option: --[dis|en]able-mem-tracing. - Moved some unused scalm code and headers into frame/1m/other. - Whitespace changes to bli_pthread.c. - Regenerated build/libblis-symbols.def. commit 61441b24f3244a4b202c29611a4899dd5c51d3a1 Author: Field G. Van Zee Date: Thu Dec 20 19:38:11 2018 -0600 Make local copy of user's rntm_t in level-3 ops. Details: - In the case that the caller passes in a non-NULL rntm_t pointer into one of the expert APIs for a level-3 operation (e.g. bli_gemm_ex()), make a local copy of the rntm_t and use the address of that local copy in all subsequent execution (which may change the contents of the rntm_t). This prevents a potentially confusing situation whereby a user-initialized rntm_t is used once (in, say, gemm), and then found by the user to be in a different state before it is used a second time. commit e809b5d2f1023b4249969e2f516291c9a3a00b80 Merge: 76016691 0476f706 Author: Field G. Van Zee Date: Thu Dec 20 16:27:26 2018 -0600 Merge branch 'master' into amd commit 1f4eeee5175a8fc9ac312847c796ce6db5fe75b9 Author: sraut Date: Wed Dec 19 21:21:10 2018 +0530 Fixed BLAS test failures of small matrix SYRK for single and double precision. Details: - SYRK for small matrix was implemented by reusing small GEMM routine. This was resulting in output written to the full C matrix, and C being symmetric the lower and upper triangles of C matrix contained same results. BLAS SYRK API spec demands either lower or upper triangle of C matrix to be written with results. So, this was resulting in BLAS test failures, even though testsuite of BLIS was passing small SYRK operation. - To fix BLAS test failures of small matrix SYRK, separate kernel routines are implemented for small SYRK for both single and double precision. The newly added small SYRK routines are in file kernels/zen/3/bli_syrk_small.c. Now the intermediate results of matrix C are written to a scratch buffer. Final results are written from scratch buffer to matrix C using SIMD copy to either lower or upper traingle part of matrix C. - Source and header files frame/3/syrk/bli_syrk_front.c and frame/3/syrk/bli_syrk_front.h are changed to invoke new small SYRK routines. Change-Id: I9cfb1116c93d150aefac673fca033952ecac97cb commit 6d267375c3a0543f20604d74cc678ad91db3b6f1 Author: sraut Date: Wed Dec 19 14:22:21 2018 +0530 This commit improves the performance of multi-instance DGEMM when these multiple threads are binded to a CCX. Multi-Instance: Each thread runs a sequential DGEMM. Change-Id: I306920c8061b6dad61efac1dae68727f4ac27df6 commit 0476f706b93e83f6b74a3d7b7e6e9cc9a1a52c3b Author: Field G. Van Zee Date: Tue Dec 18 14:56:20 2018 -0600 CHANGELOG update (0.5.1) commit e0408c3ca3d53bc8e6fedac46ea42c86e06c922d Author: Field G. Van Zee Date: Tue Dec 18 14:56:16 2018 -0600 Version file update (0.5.1) commit 3ab231afc9f69d14493908c53c85a84c5fba58aa Author: Field G. Van Zee Date: Tue Dec 18 14:53:37 2018 -0600 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. commit d1aa87164e1e82347d62aa98793963c5265ef7e7 Author: Field G. Van Zee Date: Tue Dec 18 14:52:40 2018 -0600 README.md update (External packages section). Details: - Updated External packages section in anticipation of introducing BLIS into Debian package universe. Thanks to M. Zhou for sponsoring BLIS in Debian. commit 7bf901e9265a1acd78e44c06f7178c8152c7e267 Author: sraut Date: Tue Dec 18 14:39:16 2018 +0530 Fix on EPYC machine for multi instance performance issue, Issue: For the default values of mc, kc and nc with multi instance mode the performance across the cores dip drastically. Fix: After experimentation found different set of values (mc, kc and nc) which fits in the cache size, and performance across the remains same across all the cores. Change-Id: I98265e3b7e61cd7602a0cc5596240e86c08c03fe commit d2b2a0819a2fccad9165bc48c0e172d79a87542c Author: Field G. Van Zee Date: Mon Dec 17 19:26:35 2018 -0600 Removed stray sections from Multithreading.md. Details: - Removed unintended section headers from before table of contents. commit 93d56319f2953cf0e9df1ff2cda90b8e41351b2c Author: Field G. Van Zee Date: Mon Dec 17 19:17:30 2018 -0600 Added missing bli_init_once() in bli_thread API. Details: - Fixed an issue with specifying threading globally at runtime via bli_thread_set_num_threads() (the automatic way) or via bli_thread_set_ways() (the manual way), with bli_thread_init_rntm() also affected. These functions were not calling bli_init_once() prior to acting, and therefore their effects on the global rntm_t structure were being wiped out by the eventual call to bli_init_once(), by some other BLIS function. Thanks to Ali Emre Gülcü for reporting the behavior associated with this bug. - Added additional content to docs/Multithreading.md covering topics of choosing between OpenMP and pthreads, and specifying affinity via OpenMP. - CREDITS file update. commit 76016691e2c514fcb59f940c092475eda968daa2 Author: Field G. Van Zee Date: Thu Dec 13 17:23:09 2018 -0600 Improvements to bli_pool; malloc()/free() tracing. Details: - Added malloc_ft and free_ft fields to pool_t, which are provided when the pool is initialized, to allow bli_pool_alloc_block() and bli_pool_free_block() to call bli_fmalloc_align()/bli_ffree_align() with arbitrary align_size values (according to how the pool_t was initialized). - Added a block_ptrs_len argument to bli_pool_init(), which allows the caller to specify an initial length for the block_ptrs array, which previously suffered the cost of being reallocated, copied, and freed each time a new block was added to the pool. - Consolidated the "buf_sys" and "buf_align" pointer fields in pblk_t into a single "buf" field. Consolidated the bli_pblk API accordingly and also updated the bli_mem API implementation. This was done because I'd previously already implemented opaque alignment via bli_malloc_align(), which allocates extra space and stores the original pointer returned by malloc() one element before the element whose address is aligned. - Tweaked bli_membrk_acquire_m() and bli_membrk_release() to call bli_fmalloc_align() and bli_ffree_align(), which required adding an align_size field to the membrk_t struct. - Pass the pack schemas directly into bli_l3_cntl_create_if() rather than transmit them via objects for A and B. - Simplified bli_l3_cntl_free_if() and renamed to bli_l3_cntl_free(). The function had not been conditionally freeing control trees for quite some time. Also, removed obj_t* parameters since they aren't needed anymore (or never were). - Spun-off OpenMP nesting code in bli_l3_thread_decorator() to a separate function, bli_l3_thread_decorator_thread_check(). - Renamed: bli_malloc_align() -> bli_fmalloc_align() bli_free_align() -> bli_ffree_align() bli_malloc_noalign() -> bli_fmalloc_noalign() bli_free_noalign() -> bli_ffree_noalign() The 'f' is for "function" since they each take a malloc_ft or free_ft function pointer argument. - Inserted various printf() calls for the purposes of tracing memory allocation and freeing, guarded by cpp macro ENABLE_MEM_DEBUG, which, for now, is intended to be a "hidden" feature rather than one hooked up to a configure-time option. - Defined bli_rntm_equals(), which compares two rntm_t for equality. (There are no use cases for this function yet, but there may be soon.) - Whitespace changes to function parameter lists in bli_pool.c, .h. commit f808d829c58dc4194cc3ebc3825fbdde12cd3f93 Author: Field G. Van Zee Date: Wed Dec 12 15:22:59 2018 -0600 Handle edge cases, zero-filling in packm kernels. Details: - Updated the API and semantics of packm kernels such that they must now handle edge cases, meaning that a c-by-k packm kernel must be able to pack edge cases that are fewer than c rows/columns and be able to zero-fill the remaining elements. They must also be able to zero-fill the equivalent region when copying fewer than k columns/rows (which is needed by trsm). The new packm kernel API is generally: void packm_kernel ( conj_t conja, dim_t cdim, dim_t n, dim_t n_max, ctype* restrict kappa, ctype* restrict a, inc_t inca, inc_t lda, ctype* restrict p, inc_t ldp, cntx_t* restrict cntx ); where cdim and n are the dimensions (short and long, respectively) of the submatrix being copied from the source matrix A, and n_max is the "full" long dimension (corresponding to the k dimension in gemm) of the micropanel. The "full" short dimension (corresponding to the register blocksize MR or NR) is not part of the API because it is known intrinsically by the packm kernel implementation. Thanks to Devin Matthews for prompting us to make this change (#282). - Updated all reference packm kernels in ref_kernels/1m according to above changes, as well as all optimized packm kernels (which only consisted of those for knl). - Bumped the major soname version number in 'so_version' to 2. At first I was considering leaving it unchanged, but I couldn't escape the reality that the packm kernel API is much closer to an expert API than it is some obscure helper function interface within the framework that nobody would ever notice. - Removed reference packm kernels for mr/nr = 30. The only sub-config that would have been using those kernels is knc, which is likely no longer being used by very many people (if any). (This also mostly offset the larger object code footprint incurred by moving the edge- case handling into the individual packm kernels.) - Fixed an obscure race condition for 3mh and 4mh induced methods in which those implementations were modifying the contexts stored in the gks rather than a local copy. - Fixed a minor bug in the testsuite that prevented non-1m-based induced method implementations of trsm from executing. commit 02ec0be3ba0b0d6b4186386ae140906a96de919b Merge: e275def3 c534da62 Author: Field G. Van Zee Date: Wed Dec 5 19:33:53 2018 -0600 Merge branch 'master' into amd commit c534da62c0015f91391983da5376c9e091378010 Author: Field G. Van Zee Date: Wed Dec 5 15:51:05 2018 -0600 Disabled ARM configuration families in registry. Details: - Disabled (commented out) the arm32 and arm64 configuration families in the config_registry file. Having a configuration family registered only makes sense if BLIS is currently outfitted with runtime hardware detection logic to choose the appropriate sub-configuration. That logic is currently missing for ARM architectures, and thus having the ARM configuration families in the configuration registry only serves to confuse people. Thanks to Devangi Parikh for suggesting this change. commit 6885051a164628904fad0d8a3b39c82f9a7b193c Author: Field G. Van Zee Date: Wed Dec 5 14:45:39 2018 -0600 Generalizations/cleanup to mixeddt matlab scripts. Details: - Parameterized, reorganized, and added comments to matlab scripts in test/mixeddt/matlab. - Reordered some lines of code and added comments to plot_l3_perf.m in test/3m4m/matlab. commit cbdb0566bf3201a495bbdcb8cb50342fa0098649 Author: Field G. Van Zee Date: Wed Dec 5 20:06:32 2018 +0000 Updates to 3m4m, mixeddt test driver files. Details: - Updated 3m4m and mixeddt Makefiles and runme.sh scripts, mostly to port recent changes to the former to the latter. - Disabled (for now) code in 3m4m/test_*.c files that disables all induced methods except for the one that is requested from the Makefile via the IND macro. This is done because usually, we want to test whatever method is enabled automatically for complex datatypes. (That is, when native complex microkernels are missing, we usually want to test performance of 1m.) commit 0645f239fbdf37ee9d2096ee3bb0e76b3302cfff Author: Field G. Van Zee Date: Tue Dec 4 14:31:06 2018 -0600 Remove UT-Austin from copyright headers' clause 3. Details: - Removed explicit reference to The University of Texas at Austin in the third clause of the license comment blocks of all relevant files and replaced it with a more all-encompassing "copyright holder(s)". - Removed duplicate words ("derived") from a few kernels' license comment blocks. - Homogenized license comment block in kernels/zen/3/bli_gemm_small.c with format of all other comment blocks. commit 9b688a2d69dd420f4d2582827c5ac87e422cd3bc Author: Field G. Van Zee Date: Tue Dec 4 13:30:25 2018 -0600 Refer to color mm algorithm in Multithreading.md. commit 22384fd2b749aa8cfdfad1084ce5e7dbd4ad2d64 Author: Field G. Van Zee Date: Tue Dec 4 13:09:04 2018 -0600 Minor updates to test_gemm.c in test/mixeddt. commit 2ba3b1780cbca58e43a3948d67bd07e637036125 Author: Field G. Van Zee Date: Mon Dec 3 19:40:39 2018 -0600 Removed symbols from libblis-symbols.def. Details: - Removed bli_gemm_md_front() and bli_gemm_md_zgemm() symbols from build/libblis-symbols.def, which will hopefully appease AppVeyor. commit dcb38c4e59c3395c258799e69bfe2104c578c528 Merge: dc184095 375eb30b Author: Field G. Van Zee Date: Mon Dec 3 18:06:19 2018 -0600 Merge branch 'dev' commit 375eb30b0a63ac06a363a5f75f283584258db48b Author: Field G. Van Zee Date: Mon Dec 3 17:49:52 2018 -0600 Added mixed-precision support to 1m method. Details: - Lifted the constraint that 1m only be used when all operands' storage datatypes (along with the computation datatype) are equal. Now, 1m may be used as long as all operands are stored in the complex domain. This change largely consisted of adding the ability to pack to 1e and 1r formats from one precision to another. It also required adding logic for handling complex values of alpha to bli_packm_blk_var1_md() (similar to the logic in bli_packm_blk_var1()). - Fixed a bug in several virtual microkernels (bli_gemm_md_c2r_ref.c, bli_gemm1m_ref.c, and bli_gemmtrsm1m_ref.c) that resulted in the wrong ukernel output preference field being read. Previously, the preference for the native complex ukernel was being read instead of the pref for the native real domain ukernel. This bug would not manifest if the preference for the native complex ukernel happened to be equal to that of the native real ukernel. - Added support for testing mixed-precision 1m execution via the gemm module of the testsuite. - Tweaked/simplified bli_gemm_front() and bli_gemm_md.c so that pack schemas are always read from the context, rather than trying to sometimes embed them directly to the A and B objects. (They are still embedded, but now uniformly only after reading the schemas from the context.) - Redefined cpp macro bli_l3_ind_recast_1m_params() as a static function and renamed to bli_gemm_ind_recast_1m_params() (since gemm is the only consumer). - Added 1m optimization logic (via bli_gemm_ind_recast_1m_params()) to bli_gemm_ker_var2_md(). - Added explicit handling for beta == 1 and beta == 0 in the reference gemm1m virtual microkernel in ref_kernels/ind/bli_gemm1m_ref.c. - Rewrote various level-0 macro defs, including axpyris, axpbyris, scal2ris, and xpbyris (and their conjugating counterparts) to explicitly support three operand types and updated invocations to xpbyris in bli_gemmtrsm1m_ref.c. - Query and use the storage datatype of the packed object instead of the storage datatype of the source object in bli_packm_blk_var1(). - Relocated and renamed frame/ind/misc/bli_l3_ind_opt.h to frame/3/gemm/ind/bli_gemm_ind_opt.h. - Various whitespace/comment updates. commit e275def30ac41cadce296560fa67282704f20a02 Merge: 8091998b dc184095 Author: Field G. Van Zee Date: Fri Nov 30 15:39:50 2018 -0600 Merge branch 'master' into amd commit dc18409551f341125169fe8d4d43ac45e81bdf28 Author: Field G. Van Zee Date: Wed Nov 28 11:58:40 2018 -0600 CREDITS file update. commit ee4d2712963816f84d7e3fdd39d93424e1aaf63d Merge: e81c4b56 3d7e8bc3 Author: Field G. Van Zee Date: Wed Nov 28 11:52:57 2018 -0600 Merge pull request #287 from SuperFluffy/fix_configuration_links Fix configuration links commit 3d7e8bc3b8e77693152138e75676f71573e5e6cd Author: Richard Janis Goldschmidt Date: Wed Nov 28 15:56:37 2018 +0100 Fix configuration links commit 6a4885f8be9ecd81423ebf2eb6da75d7981c979b Merge: 1d8aae22 e81c4b56 Author: Field G. Van Zee Date: Tue Nov 27 13:22:59 2018 -0600 Merge branch 'master' into dev commit e81c4b56660b25a39f8fdc09fbe07459c5bd8e8e Merge: 757043ea cfbdb58d Author: Field G. Van Zee Date: Wed Nov 21 17:00:49 2018 -0600 Merge pull request #285 from isuruf/pthread Move LDFLAGS to the end commit cfbdb58de2e44f2e3a3d8b14fceece7aef4b3006 Author: Isuru Fernando Date: Wed Nov 21 14:23:39 2018 -0600 Move LDFLAGS to the end Otherwise the linker will drop flags like -lpthread commit 757043eae8630c0a76e9bb04f2cb0bd72439a86a Merge: e769bf46 7af8fa01 Author: Field G. Van Zee Date: Wed Nov 21 13:07:26 2018 -0600 Merge pull request #283 from isuruf/patch-3 Fix MinGW and Cygwin build failures commit 7af8fa01373b7bb30fa3b1fd110fd201c87ea225 Author: Isuru Fernando Date: Wed Nov 21 02:10:05 2018 -0600 Fix blis dll path commit 2acd8dcd23805203a6821358c5e3e09d521fecdf Author: Isuru Fernando Date: Wed Nov 21 02:02:18 2018 -0600 Fix install path of dll.a commit b7b0ad22b151e89e2a6c7782cf4d8d47b4e60734 Author: Isuru Fernando Date: Wed Nov 21 01:54:44 2018 -0600 Test mingw commit bafe521ed0012b7b8814404b78a6c576d8386370 Author: Isuru Fernando Date: Wed Nov 21 01:54:36 2018 -0600 Fixes for mingw commit be831879bd03edcddff8a345161f749ad92215af Author: Isuru Fernando Date: Wed Nov 21 01:39:32 2018 -0600 test gcc shared commit f6b924648c79c4b1c3d3c7fbf85372680aff8362 Author: Isuru Fernando Date: Wed Nov 21 01:39:19 2018 -0600 Don't use .def for gcc commit ce6e4eae6d5e977e6f699acc9cf239be8ac53771 Author: Isuru Fernando Date: Wed Nov 21 01:34:56 2018 -0600 test no threading commit c9169b4685bfe81bc562cf9128b35a6a9884799b Author: Isuru Fernando Date: Wed Nov 21 01:17:36 2018 -0600 Add mingw64 path commit 0f753090eaf4264b743a49ce15de97514bcbe112 Author: Isuru Fernando Date: Wed Nov 21 01:14:52 2018 -0600 Fix PATH commit d424470b1f2fa8717fa54c0245b21341504665f6 Author: Isuru Fernando Date: Wed Nov 21 01:04:26 2018 -0600 Check openmp and pthreads threading commit c73e7601e58239e2dedec6c9f1b752e949254a42 Author: Isuru Fernando Date: Wed Nov 21 00:50:33 2018 -0600 Revert "enable rdp" This reverts commit 368274bcbd0c9232521d14fa28304f35ced0e6d7. commit 6209b2e6060b89e65f3405c31333af8952dd63c0 Author: Isuru Fernando Date: Wed Nov 21 00:50:22 2018 -0600 Remove conda commit 0b1b344447b8a2fcd635a48f0ce7ce89b2107dc4 Author: Isuru Fernando Date: Wed Nov 21 00:42:39 2018 -0600 Fix make name commit 7a9838983ba8dd32ac9f87712255721542ff561f Author: Isuru Fernando Date: Wed Nov 21 00:35:27 2018 -0600 Use m2w64-make commit 4c1dedd6a90087807f16353a5d0bcaaade35a7a5 Author: Isuru Fernando Date: Wed Nov 21 00:28:20 2018 -0600 No activate on gcc commit 368274bcbd0c9232521d14fa28304f35ced0e6d7 Author: Isuru Fernando Date: Tue Nov 20 23:40:26 2018 -0600 enable rdp commit 707a5e7f9b07f554e1e9289dd0ce3b7dc4fded6e Author: Isuru Fernando Date: Tue Nov 20 23:39:31 2018 -0600 No conda for mingw build commit 65b0565c0ad9162d4474bd84eabde491fa971538 Author: Isuru Fernando Date: Tue Nov 20 23:19:38 2018 -0600 Check MinGW-w64 commit 9ddffba5847080e0d77d9e6059d05dc4b1d89ba5 Author: Isuru Fernando Date: Wed Nov 21 00:23:34 2018 -0600 Fix MinGW build failure Fixes https://github.com/flame/blis/issues/278 commit 1d8aae220bc52ce8e3a8afaa64b57e5d83480bdc Author: Field G. Van Zee Date: Tue Nov 20 18:42:07 2018 -0600 Track internal scalar datatypes. Details: - Added a num_t datatype bitfield to the obj_t in the form of a new info2 field in the obj_t. This change was made primarily so that in the case of mixed-datatype gemm, the alpha scalar would not need to be cast to the storage datatype of B (or A) before then being cast to the computation datatype just before the macrokernel is called. This double-casting regime could result in loss of precision if the storage datatype of B (or A) is less than the computation precision. In practice, it was likely not going to be a big deal since most usage of alpha is for -1.0, 0.0, and 1.0 (or integer multiples thereof), which can all be represented exactly in single or double precision. - The type of objbits_t was changed to uint32_t, so the new format potentially takes up the same space as the previous obj_t definition, assuming no padding inserted by the compiler. Shrinking info to 32 bits and spilling over into a second field was chosen over using the high 32 bits of a single 64-bit objbits_t info field because many of the bitwise operations are performed with enums such as num_t, dom_t, and prec_t, which may take on the type of 32-bit ints. It's easier to just keep all of those bitwise operations in 32 bits than perform a million typecasts throughout bli_type_defs.h and bli_obj_macro_defs.h to ensure that the integers are treated as 64-bit for the purposes of the ANDs, ORs, and bitshifts. - Many comment updates. - Thanks to Devin Matthews and Devangi Parikh for their feedback and involvement during this commit cycle. commit e769bf46b0931d68031af212110484ec98e16908 Author: Field G. Van Zee Date: Tue Nov 20 16:16:53 2018 -0600 Tweak testsuite to issue FAIL for Nan, Inf (#279). Details: - Adjusted the definition for libblis_test_get_string_for_result() in testsuite/src/test_libblis.c so that the "FAIL" string is returned if the computed residual contains either NaN or Inf. Previously, a residual containing NaN would result in the selection of the "PASS" string. Thanks to Devin Matthews for reporting this issue (#279). - Expounded on comment for the macro definitions of bli_isnan() and bli_isinf() in bli_misc_macro_defs.h to make it more obvious why they must remain macros. commit 279deae18fb8b8106161863b46fcb38232314de4 Author: Field G. Van Zee Date: Fri Nov 16 11:34:19 2018 -0600 Added 4x5 matlab plotting scripts to test/3m4m. Details: - Added a new directory, test/3m4m/matlab, containing matlab scripts for plotting 4x5 panels of performance graphs (using the subplot() function) for gemm, hemm, herk, trmm, and trsm across all four floating-point datatypes. I expect to further refine these scripts as time goes on, but their current state constitutes a good start. commit 7b02c726650336c12286c8ba166d1d0fdf7601a8 Author: Field G. Van Zee Date: Wed Nov 14 13:49:55 2018 -0600 CREDITS file update. commit 84dd298a27033945fa2d3b6e5dce1fe625cd2a0a Author: Field G. Van Zee Date: Wed Nov 14 13:47:45 2018 -0600 Patch to fix msys2/Windows build failure (#277). Details: - Expanded cpp guard in frame/include/bli_x86_asm_macros.h to also check __MINGW32__ in addition to _WIN32, __clang__, and __MIC__. Thanks to Isuru Fernando for suggesting this fix, and also to Costas Yamin for originally reporting the issue (#277). commit 8091998b6500e343c2024561c2b1aa73c3bafb0b Merge: 333d8562 7b5ba731 Author: Field G. Van Zee Date: Wed Nov 14 12:36:35 2018 -0600 Merge branch 'master' into amd commit 7b5ba7319b3901ad0e6c6b4fa3c1d96b579efbe9 Merge: ce719f81 52392932 Author: Field G. Van Zee Date: Wed Nov 14 12:32:01 2018 -0600 Merge branch 'dev' of github.com:flame/blis into dev commit 52392932dc1ea3c16220cc4e6978efcb2f5f0616 Author: Field G. Van Zee Date: Tue Nov 13 22:23:38 2018 +0000 Minor fixes to test/3m4m drivers. Details: - Cleanups to Makefile to allow all test drivers to be built for OpenBLAS and MKL in addition to BLIS. - Fixed copy-paste typos in test_hemm in calls to ssymm_() and dsymm_(). - Fixed incorrect types for betap in BLAS cpp macro branch of test_herk.c. commit 4f12e36a0d0e6df146314b4e50e36c5e7a1af3d3 Author: Field G. Van Zee Date: Tue Nov 13 14:23:12 2018 -0600 Fixed number of columns in first output line. Details: - In previous commit, forgot to remove output column corresponding to the k dimension. commit a2e0cdd7debf8109198536d55af05d5631072fb2 Author: Field G. Van Zee Date: Tue Nov 13 14:15:11 2018 -0600 Added hemm test driver to test/3m4m. Details: - Added a new test_hemm.c test driver to test/3m4m, which was modeled after the driver by the similar name in test. Also updated Makefile so that blis-nat-[sm]t would trigger builds for the new driver. commit 0f9b53e84b48d8d73a56cc9889eae3595ca58a78 Author: Field G. Van Zee Date: Tue Nov 13 13:03:15 2018 -0600 Fixed a bug in high-level mixeddt conditional. Details: - Fixed a bug in frame/3/bli_l3_oapi.c in the conditional that divides use of induced method (1m) execution from native execution. The former was intended to only be used in cases where all storage datatypes are complex and the datatype of C is equal to the computation datatype. (If mixed datatypes are detected, native execution would be used.) However, the code in bli_gemm() was erroneously checking the execution datatype instead of the computation datatype, which at that point is guaranteed to be equal to the storage datatype even if the computation datatype contains a different value. Thanks to Devangi Parikh for helping in isolating this bug. commit 333d8562f04eea0676139a10cb80a97f107b45b0 Author: Field G. Van Zee Date: Sun Nov 11 14:28:53 2018 -0600 Added debug output to bli_malloc.c. Details: - Added debug output to bli_malloc.c in order to debug certain kinds of memory behavior in BLIS. The printf() statements are disabled and must be enabled manually. - Whitespace/comment updates in bli_membrk.c. commit ce719f816d1237f5277527d7f61123e77180be54 Author: Field G. Van Zee Date: Sat Nov 10 14:48:43 2018 -0600 More edits to mixeddt matlab scripts. Details: - Renamed scripts in test/mixeddt/matlab: plot_case_all.m -> plot_dom_all.m plot_case_md.m -> plot_dom_case.m plot_all_md.m -> plot_dt_all.m - Added plot_dt_select.m in order to plot select graphs for the main body of the mixeddt paper, and added additional related legend handling in plot_gemm_perf.m. - Added test/mixeddt/matlab/output and a .gitkeep file within in order to force git to recognize the directory. commit bf99e7c14baf45725b698d06ad043b531e3a2763 Author: Field G. Van Zee Date: Thu Nov 8 18:47:17 2018 -0600 Minor updates to test/mixeddt driver. Details: - Cleaned up test/mixeddt Makefile in preparation for gathering new data for mixeddt paper, including renaming implementations to "internal" and "ad-hoc" to match the terminology to be used in the paper. - Added new matlab scripts for generating 8 figures, each covering all mixed-precision cases for each mixed-domain case. - Updated the runme.sh script according to changes to Makefile. - Fixed a minor bug in test_gemm.c that may have given incorrect performance in complex, homogeneous storage datatype cases where the computation precision was equal to the storage precisions. (Examples: zzzd, cccs.) commit 4bbb454bf3c361af9e97bfa394a73d610cd9002a Author: Field G. Van Zee Date: Sat Nov 3 19:11:01 2018 -0500 Testsuite docs update for mixed-datatype gemm. Details: - Updated docs/Testsuite.md to include mention of the new mixed-domain and mixed-precision settings, including descriptions. - Updated docs/MixedDatatypes.md to include a brief section on running the testsuite to exercise mixed-datatype functionality, which mostly amounts to a link to the Testsuite.md document. - Minor verbiage change to testsuite output to correct a misleading label associated with the value returned by the query function bli_info_get_simd_num_registers(). (The function does not return the number of SIMD registers present in the hardware, but rather a maximum assumed value for the purposes of allocating temporary microtile workspace on the function stack.) commit 16401ae922b1285437cf5f6867b2764650a95fb0 Merge: f19c33af 2d403a15 Author: Field G. Van Zee Date: Sat Nov 3 19:09:43 2018 -0500 Merge branch 'dev' commit 2d403a1535380a2ebe2ae2c0f5ac54ba7564fbeb Merge: e90e7f30 4a12979f Author: Field G. Van Zee Date: Thu Nov 1 20:18:53 2018 -0500 Merge pull request #275 from RhysU/patch-1 Spelling in FAQ commit 4a12979f65697ed79ba290efd59f4b994ac9429b Author: Rhys Ulerich Date: Thu Nov 1 20:20:59 2018 -0400 Spelling in FAQ commit f19c33af4cbe6f5705b96fbf2b8799c3c2bd75c3 Author: Field G. Van Zee Date: Fri Oct 26 17:07:15 2018 -0500 Disallow 64b BLAS integers + 32b BLIS integers. Details: - Print an error message from configure if the user attempts to explicitly configure BLIS for simultaneous use of 64-bit integers in the BLAS API with 32-bit integers in the BLIS API. - Added cpp macro conditional to bli_type_defs.h to mandate that BLIS integers be 64 bits if the BLAS integers are 64 bits. This and the above item take care of issue #274. Thanks to Devin Matthews and Jeff Hammond for suggesting these safeguards. - Slight reorganization and relabeling (for clarity) of BLAS/CBLAS sections and BLIS integer size line of the testsuite configuration output. - Very minor edits to docs/MixedDatatypes.md. commit e90e7f309b3f2760a01e8e09a29bf702754fa2b5 Author: Field G. Van Zee Date: Thu Oct 25 14:09:43 2018 -0500 CHANGELOG update (0.5.0) commit be7c57819cfd48adb175d9a480cc9f37928645c1 Author: Field G. Van Zee Date: Thu Oct 25 14:09:40 2018 -0500 Version file update (0.5.0) commit 75da7f2a208ad7d26ed9c6d3e10d08b2a1caf9d6 Author: Field G. Van Zee Date: Thu Oct 25 14:02:41 2018 -0500 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. - Updated docs/FAQ.md to reflect recent developments, and other edits. - Minor updates to RELEASING. commit 6fbc456fb3f4401ec951a618990f15a84fdfa236 Author: Field G. Van Zee Date: Thu Oct 25 13:20:25 2018 -0500 Added SALT testing to Travis CI. Details: - Modified .travis.yml to automatically employ the simulation of application-level threading within the testsuite, with supporting changes to common.mk, the top-level Makefile, and travis/do_testsuite.sh. - Added a new pair of input files to testsuite directory with the '.salt' suffix (similar to those with the '.fast' suffix) for testing application-level threading. - Updated docs/BuildSystem.md to document the new make targets 'testblis-salt' and 'checkblis-salt'. commit 0e27963a6770e6b64f3299ad0613d5df45d8b6ae Author: Field G. Van Zee Date: Wed Oct 24 12:16:19 2018 -0500 Add bli_pthread_mutex_trylock(). Details: - Added the missing bli_pthread_mutex_trylock() function and prototype to the non-Windows sections of bli_pthread.c and .h. This function isn't needed by BLIS, but I figured why not make the Windows and non-Windows sections consistent with one another. commit 4b683740c12f83804a51ec610b16ce28607d5c85 Author: Field G. Van Zee Date: Wed Oct 24 11:56:16 2018 -0500 Defined bli_pthread_cond_*() and related defs. Details: - Added function definitions for bli_pthread_cond_*() as well as related types and constants to bli_pthread.c, and corresponding prototypes to bli_pthread.h. commit 4b4f8072b9bb495b3e01d45698b0bad3dac31ba8 Author: Field G. Van Zee Date: Wed Oct 24 11:31:46 2018 -0500 Define bli_pthreads barrier types on OS X. Details: - Fully define bli_pthreads barrier-related types on OS X. Only typedef those types in terms of pthreads types on non-Windows, non-Apple OSes (i.e. Linux). commit ad98790dcef6bd9aab7f13d615b987b5daa58757 Author: Field G. Van Zee Date: Tue Oct 23 20:35:05 2018 -0500 Fix names of Windows pthread initializer macros. Details: - Renamed the PTHREAD_ initializer macros in the Windows cpp case to use BLIS_ prefixes to match their non-Windows counterparts. commit 06c23954e6b17219a50c3d37821544a46defaf89 Author: Field G. Van Zee Date: Tue Oct 23 19:16:54 2018 -0500 Defined unified bli_pthreads_*() API for all OSes. Details: - Expanded the bli_pthread_*() -> pthread_*() wrappers in frame/thread/bli_pthread.c to include cases for Windows taken from frame/base/bli_pthread_wrap.c. Now, bli_thread_*() is always defined and always used by BLIS and the BLIS testsuite (in lieu of calling pthreads directly, as before). The implementation used in this new API depends on whether we are building for Windows, and to a lesser extent, whether we are building on OS X. For the core API, Windows uses Windows threads, non-Windows (Linux, OS X) uses pthreads. OS X and Windows get barriers implemented in terms of other bli_pthread_*() functions, and Linux gets barriers implemented in terms of pthread_barrier*(). This commit addresses issue #273. - Fixed a bug in the Linux definition of bli_pthread_mutex_unlock(), which was erroneously calling pthread_mutex_lock(). - Minor changes to configure so that the auto-detection executable can be built given the above changes (most notably, turning on POSIX extensions via -D_GNU_SOURCE). - Removed temporary play-test code for shiftd that accidentally got committed into test/3m4m/test_gemm.c. commit 0ae9585da1e3db1cf8034d4b16305a5883beb0d3 Author: pradeeptrgit Date: Tue Oct 23 09:36:23 2018 +0530 Update version number to 1.2 Change-Id: Ibb31f6683cdecca6b218bc2f0c14701d7e92ebf3 commit eac7d267a017d646a2c5b4fa565f4637ebfd9da7 Author: Field G. Van Zee Date: Mon Oct 22 18:10:59 2018 -0500 Unconditionally define bli_l3_thread_entry(). Details: - Define a dummy bli_l3_thread_entry() function when multithreading is disabled altogether, or enabled via OpenMP. This function was originally necessary when multithreading is enabled via pthreads. By defining the function no matter the threading options given, it is less likely that an AppVeyor Windows build will complain due to a missing symbol in the DLL. (To be clear: AppVeyor was working fine before, but a problem may have arisen if it were switched to an OpenMP build.) - Removed the prototype for bli_l3_thread_entry() from bli_thrcomm_pthreads.c and placed it in bli_thrcomm.h. - Regenerated the symbols list file build/libblis-symbols.def. commit 4ee986f0a74207f4ca29df077929134725d62b80 Author: Field G. Van Zee Date: Mon Oct 22 14:09:44 2018 -0500 Added mixed-datatype testing to Travis CI (#271). Details: - Modified .travis.yml to automatically test the mixed-datatype support of the gemm operation, with supporting changes to common.mk, the top-level Makefile, and travis/do_testsuite.sh. - Added a new pair of input files to testsuite directory with the '.mixed' suffix (similar to those with the '.fast' suffix) for testing mixed-datatype gemm. - Updated docs/BuildSystem.md to document the new make targets 'testblis-md' and 'checkblis-md'. commit c3c6ebc9c6244053d654a9b0c955acb2fef42ee8 Author: Field G. Van Zee Date: Sun Oct 21 18:48:54 2018 -0500 Fixed thrinfo_t printing for small problems. Details: - Fixed a bug in the code that prints out the communicator and work ids from the various threads' thrinfo_t nodes. This bug manifested when the dimension being parallelized was not large enough such that every thread was assigned actual work (since the minimum amount of work is determined by the register blocksize in the dimension being parallelized). In those cases, the threads that receive no work in that dimension do not finish building their thrinfo_t tree, leaving lower-level nodes non-existent. (The bug itself was usally observed as a segfault when the printing code attempted to dereference all the way down the thrinfo_t tree.) The solution involves explicitly checking each node as it is dereferenced, and if at any time NULL is found, all subsequent communicator and work ids are set to -1. commit 73a222c0d99dcc221be7dea10eaebf844f31f72e Author: Field G. Van Zee Date: Sat Oct 20 14:13:04 2018 -0500 Minor edits to 'configure --help' text. commit 14f3d5e6df183819a0c393b2661ad15df0786544 Author: Field G. Van Zee Date: Fri Oct 19 20:39:35 2018 -0500 Refresh libblis-symbols.def post-merge 090e4f0. commit 090e4f08fc2f429a1b2db77b0a6f8276f892a7ac Merge: c9be5889 0854e880 Author: Field G. Van Zee Date: Fri Oct 19 18:41:10 2018 -0500 Merge branch 'master' into dev commit 0854e880b0848e0c2e3d0644c93c80b0fd13c0dc Merge: 4e38a8d4 343a2715 Author: Field G. Van Zee Date: Fri Oct 19 18:05:00 2018 -0500 Merge pull request #261 from flame/win-pthreads Implement missing pthreads function on Windows commit c9be5889fbe947c64ef75740662e4d63032f4c35 Author: Field G. Van Zee Date: Fri Oct 19 17:42:40 2018 -0500 Added "Known issues" section to Multithreading.md. Details: - Added known issues section to Multithreading.md. - Trivial changes to MixedDatatypes.md, Sandboxes.md. commit 343a2715ebee28d250ee41b914abdcd1dc77c344 Author: Field G. Van Zee Date: Fri Oct 19 16:59:19 2018 -0500 Whitespace changes to configure, bli_pthread_wrap. Details: - Mostly whitespace changes (spaces to tabs) to configure and bli_pthread_wrap.c and .h. commit 3678a1cd518df9447b4b1ea86885eb2ba8abcf6e Merge: 85397cd4 4e38a8d4 Author: Field G. Van Zee Date: Fri Oct 19 16:11:31 2018 -0500 Merge branch 'master' into win-pthreads commit 4e38a8d4eebb18ead74e644fac76a4fde8e7f6c6 Author: Field G. Van Zee Date: Fri Oct 19 15:54:15 2018 -0500 Implemented python version checking in configure. Details: - Added python version checking to configure script. (Recall that python is needed to execute the flatten-headers.py script.) Minimum versions of python needed are currently as follows: python2: 2.7 or later python3: 3.5 or later The standard search order for python interpeters is: python python3 python2 The PYTHON environment variable is also supported and will be checked before the standard search order list. - Updated BuildSystem.md to include: a minimum make version; mention that the C compiler must actually be a C99 compiler; and the caveat that Windows builds do not require pthreads since BLIS can provide an implementation of pthreads internally. commit 85397cd4fa52f6c4c33f4fb715478c55533c680e Author: Field G. Van Zee Date: Fri Oct 19 13:12:43 2018 -0500 Added explanatory comment to bli_pthread.c. Details: - Added a verbose comment to bli_pthread.c that explains why a bli_ wrapper to pthreads APIs is useful. commit 53c07035ef61cc9b8469636d4d8fa5085f37652d Author: Field G. Van Zee Date: Fri Oct 19 12:53:03 2018 -0500 Refresh libblis-symbols.def from bb6df28. Details: - Forgot to regenerate the symbols file after the previous commit (bb6df281) in which shiftd operation was introduced. commit 473ce54f5fbea4860ac0514e7e8b022c1ea03e63 Author: Field G. Van Zee Date: Thu Oct 18 19:03:56 2018 -0500 Added bli_pthread_*() API. Details: - Defined a bli_pthread_*() API so that the testsuite, when being linked against a Windows DLL, will be able to access pthreads functionality without those pthreads functions being explicitly exported by the DLL. Instead, we export the bli_pthread_*() layer, which uses types and functions that are identical to pthreads, but adds a 'bli_' prefix. Only a few basic functions are present in the bli_pthreads_*() API for now. Thanks to Devin Matthews and Isuru Fernando for their help on a related PR (#261) that this commit will hopefully facilitate. - Updated testsuite so that it calls bli_pthread_*() layer instead of pthread_*() functions directly. - Regenerated build/libblis-symbols.def. - Comment updated to build/regen-symbols.sh. commit bb6df2814fcaa2fa62a549379f61be2f8667a598 Author: Field G. Van Zee Date: Thu Oct 18 17:11:39 2018 -0500 Defined a new level-1d operation: shiftd. Details: - Defined a new level-1d operation called 'shiftd', including object and typed APIs. This operation adds a scalar value to every element along an arbitrary diagonal of a matrix. Currently, shiftd is implemented in terms of the addv kernel. (The scalar is passed in as the x vector with an increment of zero.) - Replaced ad-hoc usage of setd and addd (after creating a temporary matrix object) with use of shiftd, which is much more concise, in various test driver files in the testsuite. Similar changes were made to the standalone test drivers and the example code. - Added documentation entries in BLISObjectAPI.md and BLISTypedAPI.md for bli_shiftd() and bli_?shiftd(), respectively. - Added observed object properties to level-1d documentation in BLISObjectAPI.md. commit 53e0a0c9b38e8525c7224e280342ef56328af567 Merge: 1c7247b6 ec676799 Author: Field G. Van Zee Date: Thu Oct 18 14:54:59 2018 -0500 Merge branch 'master' into win-pthreads commit ec67679990660a60362a49406595383672812287 Author: Field G. Van Zee Date: Thu Oct 18 14:27:02 2018 -0500 Refreshed Windows symbol list; added regen script. Details: - Moved windows/build/libblis-symbols.def to build/libblis-symbols.def. Updated link commands in common.mk accordingly. - Added a new script build/regen-symbols.sh that will regenerate the libblis-symbols.def file in its new location after building a haswell-targeted shared library. Thanks to Isuru Fernando for providing the symbol generation command. - Ran the new script to refresh the symbols file. commit fdad54ab8eee4a7efd04ec4afb3e6902eb22e60a Author: Field G. Van Zee Date: Thu Oct 18 12:43:22 2018 -0500 Removed old symbol from libblis-symbols.def. Details: - Removed bli_gemm_ker_var1() from windows/build/libblis-symbols.def since this function is no longer compiled. commit 49d3f9fcbb4a75553439f97c099ea48d85763eea Merge: 779d64dc 3c527256 Author: Field G. Van Zee Date: Wed Oct 17 18:00:40 2018 -0500 Merge branch 'master' into dev commit 3c52725693d0d7726e1c8fb224f9b1ef786db8b9 Author: Field G. Van Zee Date: Wed Oct 17 14:56:22 2018 -0500 Renamed/moved l3 zen ukernels to haswell kernel set. Details: - Renamed the microkernels in kernels/zen/3 to kernels/haswell/3 and then updated the file contents to use the 'haswell' infix. - Updated bli_cntx_init_zen.c and bli_cntx_init_haswell.c according to above function renames. - Moved/updated the corresponding prototypes in bli_kernels_zen.h to bli_kernels_haswell.h. - Updated config_registry according to above changes. - NOTE: This rename reflects the fact that haswell microkernels are specifically written to overcome the floating-point latency for FMA instructions on Intel Haswell-like architectures, which can issue two FMA instructions per cycle. These ukernels happen to work fine on AMD Zen-based architectures. However, Zen only issues one FMA per cycle, which, while halving its floating-point throughput, gives it extra flexibility in the design of its microkernels--namely, mr and nr can be smaller and still overcome the floating-point latency for those single-issue cores. A smaller value of mr and nr allows for a larger value of kc, which may be useful in some situations. In the future, we may write such Zen-specific microkernels to take advantage of this additional flexibility. commit 71c5832d5f5596f25204980803423d08143a4010 Author: Field G. Van Zee Date: Wed Oct 17 14:11:01 2018 -0500 Consolidated slab/rr-explicit level-3 macrokernels. Details: - Consolidated the *sl.c and *rr.c level-3 macrokernels into a single file per sl/rr pair, with those files named as they were before c92762e. The consolidation does not take away the *option* of using slab or round-robin assignment of micropanels to threads; it merely *hides* the choice within the definitions of functions such as bli_thread_range_jrir(), bli_packm_my_iter(), and bli_is_last_iter() rather than expose that choice explicitly in the code. The choice of slab or rr is not always hidden, however; there are some cases involving herk and trmm, for example, that require some part of the computation to use rr unconditionally. (The --thread-part-jrir option controls the partitioning in all other cases.) - Note: Originally, the sl and rr macrokernels were separated out for clarity. However, aside from the additional binary code bloat, I later deemed that clarity not worth the price of maintaining the additional (mostly similar) codes. commit 57eab3a4f0e43099fc2ff189df9fcc0d7801c2cd Author: Field G. Van Zee Date: Wed Oct 17 11:29:20 2018 -0500 CREDITS file update. commit 6722ec21817cbab9d86ee63f00984eb407b5e627 Author: Ye Luo Date: Wed Oct 17 11:26:00 2018 -0500 Fix bgclang compilation on BGQ (#270) * Fix bgq kernels * Support bgq with bgclang commit 1c7247b6d146fc728d7c4240e4e069e33f8f8868 Merge: c1bc5530 6c5a1aaf Author: Devin Matthews Date: Tue Oct 16 14:44:32 2018 -0500 Merge branch 'win-pthreads' of github.com:flame/blis into win-pthreads commit c1bc5530d51bf55b4aa3c35165f6d4452a0fd779 Author: Devin Matthews Date: Tue Oct 16 14:44:10 2018 -0500 Don't call pthread_once in auto-detect. commit b9c61d03f542a2e92551ff0595415bec3076ab25 Merge: 5a1e461f 3612ecac Author: Field G. Van Zee Date: Tue Oct 16 14:39:57 2018 -0500 Merge branch 'nested-omp-patch' commit 5a1e461ffe09ed200ee2fc7aafccf6dd7e8c0080 Author: Field G. Van Zee Date: Tue Oct 16 14:21:45 2018 -0500 Execute flatten-headers.py via $(PYTHON). Details: - Execute build/flatten-headers.py python script via $(PYTHON) in common.mk. This allows distributions that define the current/preferred python interpreter in the PYTHON environment variable to use that interpreter when executing flatten-headers.py. Thanks to Isuru Fernando for this suggestion, and for Dave Love for submitting the initial issue/request. commit 6c5a1aaff540b19672e91501e894ed695aee322b Author: Devin Matthews Date: Tue Oct 16 10:15:59 2018 -0500 Fix type in bli_pthread_wrap.c commit 29e6245816760b1bd4ac738d7d3e11a9d9d13473 Merge: 0b73209f ed657714 Author: Devin Matthews Date: Tue Oct 16 10:12:25 2018 -0500 Merge branch 'master' into win-pthreads commit 0b73209f6b22cc024169146d343627f6999b63d8 Author: Devin Matthews Date: Tue Oct 16 10:02:06 2018 -0500 Add missing argument to WaitForSingleObject and use $is_win in configure to turn off pthreads. commit ed65771482a705f7ed028d822489766327b44e76 Author: Field G. Van Zee Date: Mon Oct 15 17:54:45 2018 -0500 Fixed merge fail on testsuite threading macros. Details: - Applied the following C preprocessor macro renames BLIS_DEFAULT_MR_THREAD_MAX -> BLIS_THREAD_MAX_IR BLIS_DEFAULT_NR_THREAD_MAX -> BLIS_THREAD_MAX_JR BLIS_DEFAULT_M_THREAD_RATIO -> BLIS_THREAD_RATIO_M BLIS_DEFAULT_N_THREAD_RATIO -> BLIS_THREAD_RATIO_N in src/test_libblis.c. This is apparently the result of a failure by git to properly merge the 'master' and 'amd' branches in the previous commit. (The 'master' branch contained a commit, 53a9ab1, in which these same cpp macros were renamed throughout the source distribution. commit dc5fd898af8c74c2e2a75fc647157da0d04dd922 Merge: 667d3929 637c2ce7 Author: Field G. Van Zee Date: Mon Oct 15 17:41:35 2018 -0500 Merge branch 'amd' commit 779d64dc3091dea6b7530283304e52878151d218 Author: Field G. Van Zee Date: Mon Oct 15 17:13:18 2018 -0500 Added entry for xpbym to input.operations.fast. Details: - Forgot to add an entry for the new xpbym operation to input.operations.fast in previous commit. commit 5fec95b99f61761963834f62a9867f797687813c Author: Field G. Van Zee Date: Mon Oct 15 16:37:39 2018 -0500 Implemented mixed-datatype support for gemm. Details: - Implemented support for gemm where A, B, and C may have different storage datatypes, as well as a computational precision (and implied computation domain) that may be different from the storage precision of either A or B. This results in 128 different combinations, all which are implemented within this commit. (For now, the mixed-datatype functionality is only supported via the object API.) If desired, the mixed-datatype support may be disabled at configure-time. - Added a memory-intensive optimization to certain mixed-datatype cases that requires a single m-by-n matrix be allocated (temporarily) per call to gemm. This optimization aims to avoid the overhead involved in repeatedly updating C with general stride, or updating C after a typecast from the computation precision. This memory optimization may be disabled at configure-time (provided that the mixed-datatype support is enabled in the first place). - Added support for testing mixed-datatype combinations to testsuite. The user may test gemm with mixed domains, precisions, both, or neither. - Added a standalone test driver directory for building and running mixed-datatype performance experiments. - Defined a new variation of castm, castnzm, which operates like castm except that imaginary values are not touched when casting a real operand to a complex operand. (By contrast, in these situations castm sets the imaginary components of the destination matrix to zero.) - Defined bli_obj_imag_is_zero() and substituted calls in lieu of all usages of bli_obj_imag_equals() that tested against BLIS_ZERO, and also simplified the implementation of bli_obj_imag_equals(). - Fixed bad behavior from bli_obj_is_real() and bli_obj_is_complex() when given BLIS_CONSTANT objects. - Disabled dt_on_output field in auxinfo_t structure as well as all accessor functions. Also commented out all usage of accessor functions within macrokernels. (Typecasting in the microkernel is still feasible, though probably unrealistic for now given the additional complexity required.) - Use void function pointer type (instead of void*) for storing function pointers in bli_l0_fpa.c. - Added documentation for using gemm with mixed datatypes in docs/MixedDatatypes.md and example code in examples/oapi/11gemm_md.c. - Defined level-1d operation xpbyd and level-1m operation xpbym. - Added xpbym test module to testsuite. - Updated frame/include/bli_x86_asm_macros.h with additional macros (courtsey of Devin Matthews). commit 3612ecac98a9d36c3fcd64154121d420bb69febd Author: Field G. Van Zee Date: Thu Oct 11 15:16:41 2018 -0500 Added comments to nested OpenMP handling code. Details: - Added comments to bli_thrcomm_openmp.c relating to changes made in 6ac0c80 and 1064d79. commit 667d3929ee20e94849b4e25b693b4037b7e3f350 Author: Field G. Van Zee Date: Thu Oct 11 11:47:57 2018 -0500 Added Fortran APIs for some thread functions. Details: - Defined Fortran-77 compatible APIs for bli_thread_set_num_threads() and bli_thread_set_ways(). These wrappers are defined in frame/compat/blis/thread/b77_thread.c. Thanks to Kay Dewhurst for suggesting these new interfaces. - Added missing prototype for bli_thread_set_ways() in bli_thread.h and removed prototypes for non-existent functions bli_thread_set_*_nt(). - CREDITS file update. commit 1064d79711f03a0541b92d8b8b9b7e25e04097a5 Author: Devin Matthews Date: Thu Oct 11 11:14:25 2018 -0500 Adjust rntm_t struct as well. commit 6ac0c805609b85616ddb32e50101c4f9feb25a35 Author: Devin Matthews Date: Thu Oct 11 10:45:07 2018 -0500 Fix OMP nesting problem. Detect when OpenMP uses fewer threads than requested and correct accordingly, so that we don't wait forever for nonexistent threads. Fixes #267. commit 78a6935483409ae277c766406e175772e820b1de Author: sraut Date: Thu Oct 11 10:49:40 2018 +0530 Added comments for the change in syrk small matrix change. Change-Id: I958939e9953323730da49ef07d1b10e578837d82 commit 53a9ab1c85be14dcfd2560f5b16e898e3e258797 Author: Field G. Van Zee Date: Wed Oct 10 15:11:09 2018 -0500 Renamed thread auto-factorization macro constants. Details: - Renamed the following C preprocessor macros whose fallback/default values are specified within frame/include/bli_kernel_macro_defs.h: BLIS_DEFAULT_MR_THREAD_MAX -> BLIS_THREAD_MAX_IR BLIS_DEFAULT_NR_THREAD_MAX -> BLIS_THREAD_MAX_JR BLIS_DEFAULT_M_THREAD_RATIO -> BLIS_THREAD_RATIO_M BLIS_DEFAULT_N_THREAD_RATIO -> BLIS_THREAD_RATIO_N - Renamed the above cpp macro overrides within the knl, skx, and zen sub-configurations, as well as invocations of those macros in bli_rntm.c. - Moved config/zen/bli_kernel.h to an 'old' directory as it is no longer used by any code within BLIS. commit 637c2ce794b0414ba8b25e9a452f7d64f825d63a Author: Field G. Van Zee Date: Tue Oct 9 17:18:04 2018 -0500 Updated column index range for irun.py -q. Details: - Forgot to apply the column index range fix in 10f179f to situations when "quiet" mode (-q) is requested. This commit applies the new column index range modifications to the quiet case. commit e2a59400bdda7ed7ee0ff00edea70c00ed593b6c Author: Field G. Van Zee Date: Tue Oct 9 15:29:48 2018 -0500 Allow trsm_l parallelism in the jc loop. Details: - Previously, trsm was consolidating all ways of parallelism into the jr loop. This was unnecessary and to some degree detrimental on some types of hardware. Now, any parallelism bound for the jc loop will be applied to the jc loop, while all other loops' parallelism is funneled to the jr loop. Thanks to Devangi Parikh for helping investigate this issue and suggesting the fix. - NOTE: This change affects only left-side trsm. However, currently right-side trsm is currently implemented in terms of the left-side case, and thus the change effectively applies to both left and right cases. commit f1dba506c970f14e612580d3c171e7c5ffd0a5fb Author: Field G. Van Zee Date: Mon Oct 8 17:59:41 2018 -0500 Output threading status/params from testsuite. Details: - Updated testsuite to output various parameters related to parallelism in BLIS. These parameters include: - threading status: disabled, openmp, or pthreads; - thread partitioning for jr/ir loops: slab or rr (round-robin); - ways of parallelism from environment variables, and also actual values used by gemm, herk, trmm_l, trmm_r, trsm_l, and trsm_r for square problems (assuming all dimensions are set to 1000); - automatic thread factorization parameters. - Also output the status of two relatively new configure-time options: libmemkind and the sandbox. commit 10f179fb13fc1179921a4ef8efdd2174f01e07da Author: Field G. Van Zee Date: Mon Oct 8 14:36:38 2018 -0500 Updated irun.py to use updated column index range. Details: - Updated the irun.py script so that it updates the matlab column index range (if found) to reflect the additional columns of data that are substituted in. Thanks to Devangi Parikh for recognizing and reporting this issue. commit c244a716c97849dee41f52b5f424116aae1b710b Author: Field G. Van Zee Date: Sun Oct 7 20:59:40 2018 -0500 Added missing -r option to configure --help output. Details: - Added inadvertantly-omitted mention of -r option-equivalent to --thread-part-jrir to the output for 'configure --help'. Also made minor edits to the same text. commit c92762ecdca1eb0b08c8acd583b4739a1e3fbd39 Author: Field G. Van Zee Date: Sun Oct 7 20:30:32 2018 -0500 Added option of slab or rr partitioning in jr/ir. Details: - Updated existing macrokernel function names and definitions to explicitly use slab assignment of micropanels to threads, then created duplicate versions of macrokernels that explicitly use round-robin assignment instead of slab. NOTE: As in ac18949, trsm_r macrokernels were not substantially updated in this commit because they are currently disabled in bli_trsm_front.c. - Updated existing packing function (in blk_packm_blk_var1.c) to explicitly use slab partitioning, and then duplicated for round-robin. - Updated control tree initialization to use the appropriate macrokernel and packm function pointers depending on which method (slab or rr) was enabled at configure-time. - Updated configure script to accept new --thread-part-jrir=[slab|rr] option (-m [slab|rr] for short), which allows the user to explicitly request either slab or round-robin assignment (partitioning) of micropanels to threads. - Updated sandbox/ref99 according to above changes. - Minor updates to build/add-copyright.py. commit 98e01ea04bfe1032e5bd4781043afd84f864a19e Merge: ac18949a 541b8a3b Author: Field G. Van Zee Date: Thu Oct 4 20:44:12 2018 -0500 Merge branch 'master' into amd commit 541b8a3b3e9af4078f5e6fb2f9608d681839952a Author: Field G. Van Zee Date: Thu Oct 4 20:39:06 2018 -0500 Removed 1h short-circuit from bli_clock_min_diff(). Details: - Removed a guard from bli_clock_min_diff() that would return 0 if the time delta was greater than 60 minutes. This was originally intended to disregard extremely large values under the assumption that the user probably didn't intend to run a test that long. However, since it is in bli_clock_min_diff(), it doesn't actually help short-circuit an implementation that is hanging or looping infinitely, since such an implementation would first have to finish before the bli_clock_min_diff() is called. Thanks to Kiran Varaganti for reporting this issue. commit f0c3ef359f7c6c1687fb2671cb35deb346e00597 Author: Kiran V Date: Thu Oct 4 16:32:21 2018 +0530 This is a fix to floating-point exception error for BLIS SGEMM with larger matrix sizes. BUG No: CPUPL-197 fixed by Thangaraj Santanu The bli_clock_min_diff() function in BLIS assumed that if the time taken is greater than 1 hour then the reading must be wrong. However this is not the case in general, while the other checks such as time taken closer to zero or nsec is ofcourse valid. gerrit review: http://git.amd.com:8080/#/c/118694/1/frame/base/bli_clock.c Change-Id: I9dc313d7c5fdc20684f67a516bf3237de3e0694a commit 8bf30eb4735872388b5317883d99b775a344ce25 Author: Devangi N. Parikh Date: Wed Oct 3 22:22:29 2018 -0400 Fixed runme.sh in test/studies/thunderx2 Details: - Fixed the setting of threads for a single core run. commit f6f2456ba2afa8f85f43c7c2c90acc439d61d94f Author: Devangi N. Parikh Date: Wed Oct 3 21:43:46 2018 -0400 Fixed the Makefile in test/studies/thunderx2 Details: - Fixed target for make-all-st and make-all-mt so that the armpl targets are built commit 743a1a6dec1bd3908f0f15513b501c9bd59715b3 Author: Field G. Van Zee Date: Wed Oct 3 14:40:10 2018 -0500 Fixed misleading version query from gcc 7+. Details: - gcc 7 introduced new behavior to the -dumpversion option whereby only the major version component is output. However, as part of this change, gcc 7 also introduced a new option, -dumpfullversion, which is guaranteed to always output the major, minor, and revision numbers. If we are using gcc 7 or later, we re-query the version string with this new option and then re-parse the result so as to avoid misleading output from configure (e.g. using gcc 7.3.0 is reported as 7.7.7). commit de07840ba5672b9d7b2ed2b918974e98c3f249fb Author: Field G. Van Zee Date: Wed Oct 3 13:57:25 2018 -0500 Whitespace, https updates to README.md. Details: - Reformatted to fit all lines within 80 columns, unless a link is too long to fit on a single line. - Changed some links from http to https. commit 80a8b3dd8034ec8bc03d31be3f9c837c3f6fc94b Author: sraut Date: Wed Oct 3 15:30:33 2018 +0530 Review comments incorporated for small TRSM. Change-Id: Ia64b7b2c0375cc501c2cb0be8a1af93111808cd9 commit b8dfd82e0d1afda4ee5436662d63515a59b2dee3 Author: Devin Matthews Date: Tue Oct 2 15:37:12 2018 -0500 Get pthreads via blis.h in the test driver. commit d0c0c20b7bd3ecf914b5910a50f618fb7d7aa355 Author: Devin Matthews Date: Tue Oct 2 15:16:00 2018 -0500 There seems to be a problem with _POSIX_BARRIERS on Travis. commit 0904d9e4df0c8a256ac35c491f14a587ebe9fca2 Author: Devin Matthews Date: Tue Oct 2 15:04:36 2018 -0500 *Always* use Windows primitives instead of pthreads. commit 998317d309934cd7129f8c818ea6e5f07534ebc8 Author: Devin Matthews Date: Tue Oct 2 14:43:24 2018 -0500 Remove pthreads from appveyor build. commit 627d0c5bfd4b7b149803587391c93b164c11ced5 Author: Devin Matthews Date: Tue Oct 2 14:40:55 2018 -0500 Combine the alternative barrier implementation for macOS with the pthread wrapper for Windows. Also implement pthread_{create,join} for Windows. commit 81d2c064a209df7eca7d6103696ca3a137a7f82e Author: Devin Matthews Date: Tue Oct 2 11:46:36 2018 -0500 Add wrapper for basic pthreads functionality (mutex, once) with MSVC. commit d33f130ea621fca1dccb30631f454d237918eb04 Author: Devin Matthews Date: Tue Oct 2 11:45:43 2018 -0500 Some configure changes: 1) Allow environment variables to be set anywhere in the argument list. 2) Allow any environment variable to be set. 3) Allow LIBPHTREAD to be set to null without getting defaulted to -lpthread. commit 9d5f1c4f3bf70c2c0ea84bfa326a0113ae2d176c Author: Field G. Van Zee Date: Mon Oct 1 17:39:26 2018 -0500 Patch to avoid gcc warning in blastest/f2c/open.c. Details: - Use the modulo operator to limit the size of an integer that is given to sprintf(). This avoids a warning in some versions of gcc about the integer potentially overflowing the available space in the string into which the integer is being printed. commit 0c3cd00ba76de607e807f8deb04b1a2ce18ea7a8 Author: Field G. Van Zee Date: Mon Oct 1 16:18:25 2018 -0500 More README.md updates. Details: - Replaced much of "Getting Started" section with a shortened version of the bullet list of documentation currently shown in the github wiki page. Thanks to Devangi Parikh for her feedback in this change. commit 8eaf34bd23b30a1857a50d7142ee9811895f24bf Author: Field G. Van Zee Date: Mon Oct 1 14:29:07 2018 -0500 Very minor README.md update. commit 599090e0eb41b2706fa1231fa7b90096f3281678 Author: Field G. Van Zee Date: Mon Oct 1 14:04:30 2018 -0500 README.md update. Details: - Added language mentioning SHPC group to Introduction. commit ee46fa3efb6e920fa6c3d0b0601007f5de31deb5 Author: sraut Date: Mon Oct 1 16:30:30 2018 +0530 Small TRSM optimization changes :- 1) single precision small trsm kernels for XAt=B case are further optimized for performance. 2) double precision small trsm kernels for AX=B and XAtB cases are implemented. 3) single precision small trsm kernels for AutX=B are implemented in intrinsics to improve the current performance. Change-Id: Ic9d67ae6d8522615257dde018903f049dcffa2cf commit 08045a6c52b6e025652c5b18eb120c0f4e61cf6f Author: sraut Date: Mon Oct 1 15:38:23 2018 +0530 Corrected the fix made for blastest level-3 failure to check m,n,k non-zero condition in bli_gemm_small.c Change-Id: Idaf9f2327c3127b04a2738ae8a058b83d6c57934 commit ac18949a4b9613741b9ea8e5026d8083acef6fe4 Author: Field G. Van Zee Date: Sun Sep 30 18:54:56 2018 -0500 Multithreading optimizations for l3 macrokernels. Details: - Adjusted the method by which micropanels are assigned to threads in the 2nd (jr) and 1st (ir) loops around the microkernel to (mostly) employ contiguous "slab" partitioning rather than interleaved (round robin) partitioning. The new partitioning schemes and related details for specific families of operations are listed below: - gemm: slab partitioning. - herk: slab partitioning for region corresponding to non-triangular region of C; round robin partitioning for triangular region. - trmm: slab partitioning for region corresponding to non-triangular region of B; round robin partitioning for triangular region. (NOTE: This affects both left- and right-side macrokernels: trmm_ll, trmm_lu, trmm_rl, trmm_ru.) - trsm: slab partitioning. (NOTE: This only affects only left-side macrokernels trsm_ll, trsm_lu; right-side macrokernels were not touched.) Also note that the previous macrokernels were preserved inside of the 'other' directory of each operation family directory (e.g. frame/3/gemm/other, frame/3/herk/other, etc). - Updated gemm macrokernel in sandbox/ref99 in light of above changes and fixed a stale function pointer type in blx_gemm_int.c (gemm_voft -> gemm_var_oft). - Added standalone test drivers in test/3m4m for herk, trmm, and trsm and minor changes to test/3m4m/Makefile. - Updated the arguments and definitions of bli_*_get_next_[ab]_upanel() and bli_trmm_?_?r_my_iter() macros defined in bli_l3_thrinfo.h. - Renamed bli_thread_get_range*() APIs to bli_thread_range*(). commit b952ca8feb6f17f71a4512649c2aa72bdee9c8f4 Author: Field G. Van Zee Date: Fri Sep 28 16:12:32 2018 -0500 CREDITS file update. commit 7d96fc437ebaa9dd2d7071865b5df16402fadd64 Author: Field G. Van Zee Date: Fri Sep 28 15:40:45 2018 -0500 Allow slashes ('/') in version tags. Details: - Updated the configure script to allow slashes in version string. This is needed so that downstream maintainers (such as those for Debian) can create local tags such as "upstream/0.4.1". Thanks to M. Zhou for reporting this issue via PR #256 and providing me the information needed to debug the problem. commit 5fdddf6f37c64da093c7f59e3a85214e819ae652 Author: Field G. Van Zee Date: Fri Sep 28 11:25:54 2018 -0500 Removed 'debian' directory. Details: - Removed the top-level 'debian' directory. This directory is apparently no longer needed (issue #257). Thanks to M. Zhou and Nico Schlömer for their contributions. commit 9814cfdf3157ef4726ee604fc895d56e8063d765 Author: Meghana Date: Fri Sep 28 11:02:39 2018 +0530 fixed blastest level-3 failure by adding ((M&N&K) != 0) to check condition in bli_gemm_small.c Change-Id: I85e4a32996ebb880f3c00bd293edc38f74700fe6 commit 86330953b14c180862deef3ccdcc6431259be27b Merge: 7af5283d 807a6548 Author: praveeng Date: Fri Sep 28 10:08:06 2018 +0530 Resolved conflicts and modified bli_trsm_small.c Change-Id: I578d419cff658003e0fdd4c4cdc93145d951ce31 commit 60b2650d7406d266feffe232c2d5692a9e3886d0 Author: Field G. Van Zee Date: Mon Sep 24 15:04:45 2018 -0500 Added statistics-collecting irun.py script. Details: - Added irun.py script to 'build' directory. This irun.py script is a python script for repeatedly invoking a test driver executable, such as those found in test/3m4m, and replace the performance output column with four columns that aggregate statistics. Specifically, the script reports the minimum, average, maximum, and standard deviation for each problem size. This script is useful especially (though not exclusively) when trying to determine the impact of relatively minor changes to the code, or other small optimizations that may be difficult to distinguish from "noise." One way this "noise" manifests is that a test executable may run slightly slower or faster for all problem sizes (and all implementations) tested by the executable over the life of a single execution. The cause of these minor across-the-board pertubations in the overall performance signatures is unknown, though we hypothesize that it may relate to any number of issues such as operating system scheduling, where in memory the program is loaded, or how the CPU clock frequency is throttled at the time of execution. Regardless of the source of these subtle performance anomalies, the statistical properties reported by the irun.py script help the user to more precisely characterize the underlying performance exhibited by any given test driver, which allows him or her to make better judgments about the true difference in performance between two implementations, or minor changes within a single implementation. commit 807a654888117fb3a27ea36384f1c1c11b882cd5 Author: Field G. Van Zee Date: Thu Sep 20 15:41:05 2018 -0500 Fixed confusing configure message for libmemkind. Details: - Corrected feedback echoed to user by configure when libmemkind is found but not explicitly requested. In these cases, configure would echo a message that it had received an explicit request to enable libmemkind, which was not accurate, even if the end result was the same--that libmemkind is enabled by default when it is found. Thanks To Devangi Parikh for reporting this issue. commit 02adab427c779b0aaf38a5877a5f0246b1909e8f Author: Devangi N. Parikh Date: Thu Sep 20 14:38:50 2018 -0400 Created a 'thunderx2' subdirectory within test/studies Details: - Created a 'thunderx2' subdirectory within test/studies to house various level-3 test driver used to measure performance on ThunderX2. commit d7537fb51dac0636591fc7c68261a2322642ab3c Merge: dad07245 c03728f1 Author: Field G. Van Zee Date: Wed Sep 12 15:24:20 2018 -0500 Merge branch 'dev' commit dad07245dbcfaf35232ec379ba756eb133c361c1 Author: Devangi N. Parikh Date: Wed Sep 12 04:16:58 2018 -0500 Fixed yet another bug in runme script in test/studies Details: - Fixed another copy-paste bug commit e669057fe35f2037d8111af687d84a0ecf6d7a2a Author: Devangi N. Parikh Date: Tue Sep 11 22:29:42 2018 -0500 Fixed bug in runme script in test/studies Details: - Fixed bug in runme script for skx studies that set the number of threads incorrectly commit 232fdc3df3e01ae3f86d53767bd14eb93b511e6e Author: Devangi N. Parikh Date: Mon Sep 10 18:45:50 2018 -0500 Updated runme script in test/studies. Details: - Updated runme script for skx studies to run multithreading tests on 1 and 2 sockets. commit c03728f1f45edb5e434db90ab8a77ba0184a682b Author: Field G. Van Zee Date: Mon Sep 10 17:54:27 2018 -0500 Various minor cleanups. Details: - Rewrote bli_winsys.c to define bli_setenv() and bli_sleep() unconditionally, but differently for Windows and non-Windows, but then disabled the definition of bli_setenv() entirely since BLIS no longer needs to set environment variables. Updated bli_winsys.h accordingly, and call bli_sleep() from within testsuite instead of sleep() directly. - Use #if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS != 200809L) instead of #if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS < 0) when guarding against local definition of pthread barrier in testsuite. (The description for unistd.h implies that _POSIX_BARRIERS should always be set to 200809L when barriers are supported, though I won't be surprised if we encounter a case in the future where it is set to something else such as 1 while still supported.) - Removed old _VERS_CONF_INST definitions and installation rules in top-level Makefile. These are no longer needed because we no longer output libraries with the version and configuration name as substrings. - Comment/whitespace updates in Makefile, config.mk.in, common.mk, configure, bli_extern_defs.h, and test_libblis.h. - Added mention of 1m to README.md and other trivial tweaks. commit e249a00a82908054ecd307cf602c8801275903e8 Author: Field G. Van Zee Date: Mon Sep 10 16:48:35 2018 -0500 Imported skx dgemm ukernel from skx-redux branch. Details: - Added the new bli_dgemm_skx_asm_16x14.c microkernel from the skx-redux branch, along with appropriate blocksizes in bli_cntx_init_skx.c and a prototype in bli_kernels_skx.h. (Devin has not yet written the sgemm analague, so for now we will continue using the older sgemm ukernel.) - Updated frame/include/bli_x86_asm_macros.h with a minor change that was present within the skx-redux branch. commit e93b01ff60bf9742baa5eefd93e208d1219e7a43 Author: Isuru Fernando Date: Sun Sep 9 15:57:43 2018 -0500 Windows DLL support (#246) * Enable shared * Enable rdp * Add support for dll * Use libblis-symbols.def * Fix building dlls * Fix libblis-symbols.def * Fix soname * Fix Makefile error * Fix install target * Fix missing symbols * Add BLIS_MINUS_TWO * Add path to dll * Fix OSX soname * Add declspec for dll * Add -DBLIS_BUILD_DLL * Replace @enable_shared@ in config * switch to auto for now * blis_ -> bli_ * Remove BLIS_BUILD_DLL in make check * change auto->haswell * enable_shared_01 * Add wno-macro-redefined * print out.cblat3 * BLIS_BUILD_DLL -> BLIS_IS_BUILDING_LIBRARY * Use V=1 * Remove fpic for windows * Remember LIBPTHREAD * Remove libm for windows * Remember AR * Fix remembering libpthread * Add Wno-maybe-uninitialized in only gcc * Don't do blastest for shared for now * Fix install target And remove unnecessary change * test auto and x86_64 * Fix install target again * Use IS_WIN variable * Remove leading dot from LIBBLIS_SO_MAJ_EXT * Make is_win yes/no * Add comments for windows builds * Change if else blocks location commit 1330d5c4bc3b644ec0af54c3939a5b9f00eacd9c Author: Field G. Van Zee Date: Fri Sep 7 19:37:59 2018 -0500 Employ "user" cflags for tl Makefile test targets. Details: - Use get-user-cflags-for() to generate cflags when compiling BLAS test drivers and BLIS testsuite from top-level Makefile. Meant to include these changes in previous commit (4b5437e). Thanks to Isuru Fernando for pointing out this oversight. commit 4b5437ec7afb2befffffbb83f7872bcb4fc61e51 Author: Field G. Van Zee Date: Fri Sep 7 17:24:32 2018 -0500 Define a cpp macro specific to BLIS compilation. Details: - Tweaked the cflags functions in common.mk so that a new preprocessor macro, BLIS_IS_BUILDING_LIBRARY, is defined, but only when BLIS itself is being built. This macro will not be defined when, for example, the testsuite or example code compiles code local to those applications. This was done in part by defining a new cflags function get-user-cflags-for(), which is now the designated function for application Makefiles if they wish to inherit a basic set of CFLAGS from BLIS. (The compiler flags returned are identical to that of get-frame-cflags-for() except that -DBLIS_IS_BUILDING_LIBRARY is omitted.) - Updated all test driver-like makefiles to call get-user-cflags-for() instead of get-frame-cflags-for(). commit cc2cca4f56eb30212a0dce3e5c121e64d9e59560 Merge: e19e7212 fb81c7fc Author: Field G. Van Zee Date: Thu Sep 6 17:12:13 2018 -0500 Merge branch 'dev' commit e19e7212872da3d464734199193436faa51f0da0 Merge: 97965b09 b3d0702c Author: Jeff Hammond Date: Thu Sep 6 14:58:49 2018 -0700 Merge pull request #244 from kali/pthread-barrier-osx add an adhoc impl for pthread_barrier commit b3d0702cf2ef6dda19a23dd8a677be1b6f73c322 Merge: 4e7d0670 97965b09 Author: Jeff Hammond Date: Thu Sep 6 14:58:23 2018 -0700 Merge branch 'master' into pthread-barrier-osx commit 4e7d06700f176a62952d7d51e41fdcbc6b7a9d5f Author: Mathieu Poumeyrol Date: Thu Sep 6 23:48:31 2018 +0200 second __APPLE__ commit fb81c7fc665d68e6a2add163feb29acc0bce8936 Author: Field G. Van Zee Date: Thu Sep 6 16:29:39 2018 -0500 Defined cortexa53 sub-configuration. Details: - Added a new sub-configuration 'cortexa53', which is a mirror image of cortexa57 except that it will use slightly different compiler flags. Thanks to Mathieu Poumeyrol for making this suggestion after discovering that the compiler flags being used by cortexa57 were not working properly in certain OS X environments (the fix to which is currently pending in pull request #245). commit 24ecc0d94aaa9ab4df1ae6d199c4ec6d7783169f Author: Mathieu Poumeyrol Date: Thu Sep 6 22:10:16 2018 +0200 use _POSIX_BARRIERS instead of __APPLE__ commit 97965b09059a610db06fb7a22bdfa79c0d37d673 Author: Mathieu Poumeyrol Date: Thu Sep 6 21:10:29 2018 +0200 cortexa9 and cortexa53 travis build + qemu test (#245) commit a6802eab7d94b5a9de633c53beca8245b74f5dc6 Author: Mathieu Poumeyrol Date: Thu Sep 6 17:16:35 2018 +0200 reinstantiate test on macos commit d688a2b7e5a19cba44ea398a99e325e19b8fce50 Author: Mathieu Poumeyrol Date: Thu Sep 6 15:25:16 2018 +0200 add an adhoc impl for pthread_barrier commit ab9f9e684dc3ffbb70cc45b21c67af5d916919e5 Author: Field G. Van Zee Date: Thu Aug 30 15:14:02 2018 -0500 CHANGELOG update (0.4.1) commit 10fd614031307c46db3d893528d4e5fc31f490b3 Author: Field G. Van Zee Date: Thu Aug 30 15:13:59 2018 -0500 Version file update (0.4.1) commit 08dd67c4b21244851f8416bd59159bea7a9c5b3d Author: Field G. Van Zee Date: Thu Aug 30 15:12:13 2018 -0500 ReleaseNotes.md update in advance of next version. commit 4fa4cb0734e7de6505b5d6f1aeef3a5d5c89dcbb Author: Field G. Van Zee Date: Wed Aug 29 18:06:41 2018 -0500 Trivial comment header updates. Details: - Removed four trailing spaces after "BLIS" that occurs in most files' commented-out license headers. - Added UT copyright lines to some files. (These files previously had only AMD copyright lines but were contributed to by both UT and AMD.) - In some files' copyright lines, expanded 'The University of Texas' to 'The University of Texas at Austin'. - Fixed various typos/misspellings in some license headers. commit b051ffb815baf6c3ece2b5118b679fd9219d5780 Merge: 6f33d9de aaa549f4 Author: Field G. Van Zee Date: Wed Aug 29 17:06:48 2018 -0500 Merge branch 'dev' commit 6f33d9de21fbc2f579846b9104fb9d513753f79c Author: Mathieu Poumeyrol Date: Wed Aug 29 23:48:22 2018 +0200 fix compilation of armv7a kernels (#242) commit 8199e339aefdd27019c7f3d8c99818d375d5400b Author: Field G. Van Zee Date: Mon Aug 27 07:00:12 2018 -0500 Added testsuite threading to input.general.fast. Details: - Added lines associated with the testsuite's new threading option to input.general.fast. This change was intended for the previous commit (10d0735). commit 10d07357afbb2d468837aa97369ef9a6d0610817 Author: Field G. Van Zee Date: Sun Aug 26 20:34:30 2018 -0500 Better thread safety; added threading to testsuite. Details: - Replaced critical sections that were conditional upon multithreading being enabled (via pthreads or OpenMP) with unconditional use of pthreads mutexes. (Why pthreads? Because BLIS already requires it for its initialization mechanism: pthread_once().) This was done in bli_error.c, bli_gks.c, bli_l3_ind.c. Also, replaced usage of BLIS's mtx_t object and bli_mutex_*() API with pthread mutexes in bli_thread.c. The previous status quo could result in a race condition if the application called BLIS from more than one thread. The new pthread-based code should be completely agnostic to the application's threading configuration. Thanks to AMD for bringing to our attention the need for a thread-safety review. - Added an option to the testsuite to simulate application-level multithreading. Specifically, each thread maintains a counter that is incremented after each experiment. The thread only executes the experiment if: counter % n_threads == thread_id. In other words, the threads simply take turns executing each problem experiment. Also, POSIX guarantees that fprintf() will not intermingle output, so output was switched to fprintf() instead of libblis_test_fprintf(). - Changed membrk_t objects to use pthread_mutex_t intead of mtx_t and replaced use of bli_mutex_init()/_finalize() in bli_membrk.c with wrappers to pthread_mutex_init()/_destroy(). - Changed the implementation of bli_l3_ind_oper_enable_only() to fix a race condition; specifically, two threads calling the function with the same parameters could lead to a non-deterministic outcome. - Added #include to bli_cpuid.c and moved the same in bli_arch.c. - Added 'const' to declaration of OPT_MARKER in bli_getopt.c. - Added #include to bli_system.h. - Added add-copyright.py script to automate adding new copyright lines to (and updating existing lines of) source files. commit aaa549f4d1e63929fe2bea023ce849253cfbbb42 Author: Field G. Van Zee Date: Sun Aug 26 20:13:51 2018 -0500 Minor update to configure --help (--sharedir option). Details: - Fixed/tweaked description for --sharedir=SHAREDIR option. commit 573b8ac373f821a65cc8afd51cdbe03b8ec01081 Author: Field G. Van Zee Date: Sun Aug 26 13:51:32 2018 -0500 Fixed copy-paste typo in previous commit. Details: - Fixed a typo in travis/do_testsuite.sh introduced in 62ea1d3. commit 62ea1d33d3bc1e890420a1e828b9d0e87e87533b Author: Field G. Van Zee Date: Sun Aug 26 13:35:53 2018 -0500 Fixed broken out-of-tree builds. Details: - Fixed stale filepaths to check-blastest.sh and check-blistest.sh in travis/do_testsuite.sh and travis/do_sde.sh. - Create a symbolic link to the 'config' directory so that the top-level Makefile can find the configs' make_defs.mk files during out-of-tree builds. - Added additional case handling to out-of-tree scenario to handle situations where files 'Makefile', 'common.mk', or 'config' exist but are not symbolic links. In such cases, configure warns the user and exits. - Homogenized various error messages throughout configure. - Belated thanks to Victor Eijkhout for requesting the feature added in 0f491e9 whereby lesser Makefiles can compile and link against an existing installation of BLIS. commit 0f491e994a7e14d4dfce26e6a51dba2bccad29a3 Author: Field G. Van Zee Date: Sat Aug 25 20:12:36 2018 -0500 Allow lesser Makefiles to reference installed BLIS. Details: - Updated the build system so that "lesser" Makefiles, such as those in belonging to example code or the testsuite, may be run even if the directory is orphaned from the original build tree. This allows a user to configure, compile, and install BLIS, delete the build tree (that is, the source distribution, or the build directory for out- of-tree builds) and then compile example or testsuite code and link against the installed copy of BLIS (provided the example or testsuite directory was preserved or obtained from another source). The only requirement is that make be invoked while setting the BLIS_INSTALL_PATH variable to the same installation prefix used when BLIS was configured. The easiest syntax is: make BLIS_INSTALL_PATH=/install/prefix though it's also permissible to set BLIS_INSTALL_PATH as an environment variable prior to running 'make'. - Updated all lesser Makefiles to implement the new aforementioned build behavior. - Relocated check-blastest.sh and check-blistest.sh from build to blastest and testsuite, respectively, so that if those directories are copied elsewhere the user can still run 'make check' locally. - Updated docs/Testsuite.md with language that mentions this new option of building/linking against an installed copy of BLIS. commit 36ff92ce0d3b428b15b6cddc6f5944afe22e43ec Author: Field G. Van Zee Date: Fri Aug 24 18:26:09 2018 -0500 Missing C++ compiler no longer fatal to configure. Details: - Changed configure so that the absence of any C++ compiler from the pre-defined search list does not result in an exit. Instead, in this situation, the found_cxx variable is assigned 'c++notfound' and the error message is changed to remind the user that C++ will not be available in the sandbox. Thanks to Devangi Parikh for reporting this issue. - Also tweaked the message when a C++ compiler *is* found to remind any would-be confused user that BLIS will only use C++ if it is needed by code in the sandbox. commit 658f0a129bdc565b072696b6ebddce501132091c Author: Field G. Van Zee Date: Fri Aug 24 17:49:37 2018 -0500 Fixed obscure integer size bug in va_arg() usage. Details: - Fixed a bug in the way that the variadic bli_cntx_set_l3_nat_ukrs() function was defined. This function is meant to take a microkernel id, microkernel datatype, microkernel address, and microkernel preference as arguments, and is typically called within the bli_cntx_init_*() function defined within a sub-configuration for initializing an appropriate context. The problem is with the final argument: the microkernel preference. These preferences are actually boolean values, 0 or 1 (encoded as FALSE or TRUE). Since the variadic function does not give the compiler any type information for any variadic arguments, they are "promoted" in the course of internal (macroized) processing according to default argument promotion rules. Thus, integer literals such as 0 and 1 become int and floating-point literals (such as 0.0 or 1.0) become double. Previous to this commit, we indicated to va_arg() that the ukernel preference was a 'bool_t', which is a typedef of int64_t on 64-bit systems. On systems where int is defined as 64 bits, no problems manifest since int is the same size as the type we passed in to va_arg(), but on systems where int is 32 bits, the ukernel preference could be misinterpreted as a garbage value. (This was observed on a modern armv8 system.) The fix was to interpret the bool_t value as int and then immediately typecast it to and store it as a bool_t. Special thanks to Devangi Parikh for helping track down this issue, including deciphering the use of va_arg() and its byzantine treatment of types. - Added explicit typecasts for all invocations of va_arg() in bli_cntx.c. commit e71dc389120b032e42091e4d1a928515ed6f7275 Author: Field G. Van Zee Date: Fri Aug 24 15:56:04 2018 -0500 Fixed a very minor memory leak in gks. Details: - Fixed a memory leak in the global kernel structure that resulted in 56 bytes per configured architecture (of which only 18 are presently supported by BLIS). The leak would only manifest if BLIS was initialized and then finalized before the application terminated. Thanks to Devangi Parikh for helping track down this leak. commit a7e3a5f9753468c8e665e6c5c3b38d22b7c92500 Author: Field G. Van Zee Date: Fri Aug 24 14:51:11 2018 -0500 Fixed uncallable bli_finalize(). Details: - Previously, bli_finalize_once()--which, like bli_init_once(), was implemented in terms of pthread_once()--was using the same pthread_once_t control object being used by bli_init(), thus guaranteeing that it would never be called as long as BLIS had already been initialized. This could manifest as a rather large memory leak to any application that attempted to finalize BLIS midway through its execution (since BLIS reserves several megabytes of storage for packing buffers per thread used). The fix entailed giving each function its own pthread_once_t object. Thanks to Devangi Parikh for helping track down this very quiet bug. commit a79c21c7c17fb4854fd24c73b81ec5543f74082d Author: Field G. Van Zee Date: Thu Aug 23 14:40:46 2018 -0500 Fixed cleanmk target post-1b0f8d6. Details: - Changed the cleanmk target to delete makefile fragments from their new home in obj/$(CONFIG_NAME). The old definition worked only because of a typo (REFERKN_PATH instead of REFKERN_PATH), and only in the non-verbose (V != 1) case. commit ffb57242f3eb1175c991fe1b492595fdaa175c27 Author: Field G. Van Zee Date: Wed Aug 22 18:22:41 2018 -0500 Cosmetic output changes to configure. Details: - Disable sandbox-related obj directory creation, directory mirroring, and makefile fragment generation when a sandbox is not enabled. - Prevent various duplicate actions by configure (such as those mentioned above for sandboxes above). commit ac17454aae9ad430f05aa7c156919c6c695c300c Merge: a77bec76 7afd095a Author: Field G. Van Zee Date: Wed Aug 22 15:34:53 2018 -0500 Merge branch 'master' into dev commit a77bec766a01e42f13f8cacbec8c4cbde8ecefef Author: Field G. Van Zee Date: Wed Aug 22 15:31:29 2018 -0500 Whitespace changes, minor renames in build system. Details: - Minor whitespace cleanup, mostly in the form of spaces -> tabs. - Shortened certain variables' _FRAGMENT_ infixes to _FRAG_ in common.mk. commit 1b0f8d60d1132b56485cc202ebf1246898d3a2a4 Author: Devin Matthews Date: Wed Aug 22 13:19:29 2018 -0700 Generate makefile fragments in build tree (#240) * Make src dir read-only in out-of-tree build test. * Generate makefile fragments in the build tree. commit 7afd095af33690e0175903852b354c9fe46993f6 Author: Field G. Van Zee Date: Wed Aug 22 14:58:24 2018 -0500 Removed skx from code snippet in previous commit. Details: - The docs/ConfigurationHowTo.md document was written with examples that did not yet contain the skx sub-configuration, but the previous commit included bli_arch.c code copied and pasted from a recent commit that does support skx. To keep things consistent, I've removed skx from the recently-added ConfigurationHowTo.md code snippet. commit 48211a980d78673133076e8eced1007b1980f5e6 Author: Field G. Van Zee Date: Wed Aug 22 14:55:02 2018 -0500 Update to docs/ConfigurationHowTo.md. Details: - Added missing language directing the reader to modify the config_name string array in bli_arch.c when adding a new sub-configuration. Thanks to Devangi Parikh for reporting this missing section. commit 65c9096c6e21f3dc2947fa12be9ea3034f8662dc Author: Field G. Van Zee Date: Fri Aug 17 11:44:12 2018 -0500 Fixed broken -p option to configure. Details: - Fixed some stale code that was preventing the -p option to configure from working as expected (though the --prefix option was unaffected). This bug was was most likely introduced in 7e5648c (May 7 2018). Thanks to Dave Love for reporting this issue. commit e358d5e497c77b305af462f44266370a596445e2 Author: Field G. Van Zee Date: Thu Aug 16 12:18:45 2018 -0500 README.md update (Funding section). commit a61dd5e7bcf23f7237d407a5e06dd44e1bec9ad0 Author: Field G. Van Zee Date: Tue Aug 14 17:08:03 2018 -0500 Changed 'test' target to be more like 'check'. Details: - Redefined the 'test' make target in the top-level Makefile so that the final result ("everything passed" or at "least one failure") is echoed to stdout. Note that 'check' is unchanged, and thus is now effectively a fast version of 'test'. - Updated docs/BuildSystem.md to reflect the above change. commit ce5c3a198a7ae1ca676c27da4541d51ed19d16e1 Merge: 4f6745d6 0bbe69d5 Author: Field G. Van Zee Date: Tue Aug 14 16:52:19 2018 -0500 Merge branch 'master' of github.com:flame/blis commit 4f6745d68a2c66511695eff0beb00a82ffc6bbbe Author: Field G. Van Zee Date: Tue Aug 14 16:50:47 2018 -0500 Fixed link error when building only shared library. Details: - Fixed a linker error that occurred when attempting to compile and link the testsuite and/or BLAS test drivers after having configured BLIS to only generate a shared library (no static library). The chosen solution involved (1) adding the local library path, $(BASE_LIB_PATH), to the search paths for the shared library via the link option -Wl,-rpath,$(BASE_LIB_PATH). (2) adding a local symlink to $(BASE_LIB_PATH) that uses the .so major version number so that ld would find the shared library at execution time. Thanks to Sajid Ali for reporting this issue, to Devin Matthews for pointing out the need for the -rpath option, and to Devangi Parikh for helping Sajid isolate the problem. - Added #include to bli_system.h to avoid a compiler warning resulting from using toupper() from bli_string.c without a prototype. Thanks again to Sajid Ali, whose build log revealed this compiler warning. - Added '*.so.*' to .gitignore. - CREDITS file update. commit 0bbe69d5ed260849297d8f2d35b7668d167482ed Author: Devangi N. Parikh Date: Tue Aug 14 14:49:58 2018 -0500 Updated plotting scripts in test/studies. Details: - Fixed indexing on plots to correspond to the removal of dtime in the test drivers. commit e93e0e149e087e08eca2885f1a748a4e88ffe55d Author: Field G. Van Zee Date: Tue Aug 7 15:54:30 2018 -0500 Removed redefinition of axpyv, scal2v func types. Details: - Removed a stray/accidental redefinition of axpyv and scal2v function types in frame/1d/bli_l1d_ft.h (probably a copy/paste leftover during development). commit 1deb33bd16349aaa643694d1bd685ff8a9a5f476 Author: Field G. Van Zee Date: Tue Aug 7 15:02:50 2018 -0500 Updated penryn kernels to use new _ker_ft type names. Details: - Updated older _ft kernel type suffixes used within penryn level-1v and -1f kernels to use the newer _ker_ft suffix that was introduced in 0175483. (Thank you Travis CI.) commit 9cb0b023ca91abdc056d726cdc070062e4954611 Author: Field G. Van Zee Date: Tue Aug 7 14:21:07 2018 -0500 INSTALL file update. commit 017548314f3f78f66fbe3264509ac5302bd8d62b Author: Field G. Van Zee Date: Tue Aug 7 14:13:25 2018 -0500 Replaced function chooser macros w/ func ptr arrays. Details: - Previously, most object API functions (_oapi.c) used a function chooser macro that would expand out to an if-elseif-elseif-else conditional that used a num_t datatype to call the appropriate type-specific API (_tapi.c). This always felt a little hackish, and would get in the way somewhat of addig support for new num_t datatypes in the future. So, I've replaced that functionality with code that queries a function pointer that is then typecast appropriately. This model of function calling was already pervasive for kernels queried from the cntx_t structure. It was also already in use in various other functions, such as macrokernels, and this commit simply extends that pattern. - The above change required many new files, mostly header files, that define the function types (mostly _ft.h) for the queriable functions as well as some source files to define the function pointer arrays and their corresponding query functions (_fpa.c). Various other function types, mostly for kernel function types, were renamed to reduce the potential for confusion with the function types for expert and basic (non-expert) typed API functions. - Removed definitions for all of the "bli_call_ft_*()" function chooser macros from bli_misc_macro_defs.h. commit addce089664561f9f63efa6f107e58fc48d29871 Author: Field G. Van Zee Date: Mon Aug 6 13:18:20 2018 -0500 Format spec and other updates in test, test/3m4m. Details: - Removed the dtime (delta time, or wallclock time) column from the matlab output of all test drivers in test, test/3m4m, test/studies. This value was rarely (if ever) really needed and usually only served to take up screen space. - Updated format specifier in test/studies/skx to use %7.2f instead of %6.3f. - For the test drivers in 'test' directory, added an initial line of output that sets last entry of matlab matrix to zero in order to induce a pre-allocation of the entire array of performance results. commit 94d5ef42c833a4d43e50a80d46dddbd7a56d2db6 Author: Field G. Van Zee Date: Sat Aug 4 15:57:17 2018 -0500 Adjusted gflops format spec in testsuite, test/3m4m. Details: - Changed the format specifier for the gflops column in the testsuite output from %7.3f to %7.2f. This was done mainly to keep the output aligned properly when the expected perfomance exceeded 1000 gflops. Also, two decimal places still conveys plenty of precision for all practical applications, including just eyeballing performance deltas between two executions (let alone two implementations). - Changed the format specifier for gflops in the test/3m4m drivers from %6.3f to %7.2f (for the same reasons listed above). commit c7ff06bae92b9b6c6656f2030d13486b95417821 Merge: 6074082c ebe998d0 Author: Devangi N. Parikh Date: Wed Aug 1 14:20:41 2018 -0500 Merge branch 'master' of https://github.com/flame/blis commit 6074082cd359dd775ef72478f8f3a281c5a6a6f9 Author: Devangi N. Parikh Date: Wed Aug 1 13:30:51 2018 -0500 Fixed bug in bli_cntx_set_packm_ker_dt() implementation. Details: - Fixed bug in static function bli_cntx_set_[packm/unpackm]_ker_dt(), which were incorrectly calling bli_cntx_get_[packm/unpackm]_ker_dt to get the corresponding func_t. commit ebe998d06cc56a9a9d66990b6ebf683d6fd0efdf Author: Field G. Van Zee Date: Wed Aug 1 13:24:00 2018 -0500 Fixed typos in BuildSystem.md from previuos commit. commit e72a344e94c5ae253f69b60f41d92ca89a5d1d1c Author: Field G. Van Zee Date: Wed Aug 1 13:00:38 2018 -0500 Added table of 'make' targets to BuildSystem.md. Details: - Added a new section to BuildSystem.md that describes the most useful make targets defined in the top-level Makefile. commit 4f60d0288e00586dc921ff57db851f1266ff8e70 Author: Field G. Van Zee Date: Mon Jul 30 19:22:57 2018 -0500 README.md, comment updates. Details: - Added links, and sandbox language to README.md. - Adjusted some comments in high-level level-3 object functions to make clear what bli_thread_init_rntm() does. commit 455d3f49e5c8362395be14c79e6adb5123e29623 Author: Field G. Van Zee Date: Sun Jul 29 18:31:29 2018 -0500 Edits to object/typed API, multithreading docs. commit 922a1c05e06f52c97fb369870dce07233e61c4c9 Author: Field G. Van Zee Date: Sat Jul 28 20:15:55 2018 -0500 More tweaks to README.md. commit a7a0cf2b5d9f1dea5061c0f20eeaf371dfd4ea12 Author: Field G. Van Zee Date: Sat Jul 28 16:59:31 2018 -0500 More edits to docs/Multithreading.md. commit be21d0cf68c330fd0d2048465a43ddc59d0b9d6c Author: Field G. Van Zee Date: Sat Jul 28 16:46:51 2018 -0500 Fixed typos in docs/Multithreading.md. commit eac07c7b4f7a41c68d63f1e67141b2b58009609e Author: Field G. Van Zee Date: Sat Jul 28 16:45:28 2018 -0500 Edits to docs/Multithreading.md. commit 5438375a032273b46ae626fee909ffc05f48ab72 Author: Field G. Van Zee Date: Sat Jul 28 16:34:21 2018 -0500 Fixed link in README.md. commit 1f1a237d3f0b24d71ce2d7ee52d8a84f8e6a29ad Author: Field G. Van Zee Date: Sat Jul 28 16:33:28 2018 -0500 Fixed links in BLISTypedAPI.md. commit 89c8806e3aa49310f36c0314c5f6956c83a627a1 Author: Field G. Van Zee Date: Sat Jul 28 16:30:56 2018 -0500 Minor doc fixes to previous commit. commit b8c7574f84873b9c408f70c29c41ce464df57c2d Author: Field G. Van Zee Date: Sat Jul 28 16:27:09 2018 -0500 README.md, typed/object API updates. Details: - Updated the typed and object APIs to include language on the rntm_t parameters in the expert interfaces. - Updated README to include link to object API. commit 29c34c4adb02d91fb34d1ccc0e821d6cfb7ce5c5 Author: Field G. Van Zee Date: Fri Jul 27 16:26:19 2018 -0500 CREDITS file update. commit 55a04edf52ac4f16c51b738bc884684adc1f1777 Author: Field G. Van Zee Date: Fri Jul 27 16:10:46 2018 -0500 CHANGELOG update (0.4.0) commit 4ad61ce905d250dd3ef197f0d06a69ce6d99d309 Author: Field G. Van Zee Date: Fri Jul 27 16:10:43 2018 -0500 Version file update (0.4.0) commit b86cf13793b07f35c027a56c9faec8f4b6279d3e Author: Field G. Van Zee Date: Fri Jul 27 16:08:21 2018 -0500 Release Notes update in advance of next version. commit a8b4084a0e04e47ac02ceae93a2018f5363e1205 Author: Field G. Van Zee Date: Fri Jul 27 16:07:26 2018 -0500 CREDITS file update. commit 8e10cac5f388ac961c3d77b0a465214e7c9dc91a Author: Field G. Van Zee Date: Fri Jul 27 14:45:35 2018 -0500 Updates to CREDITS, RELEASING, config/README.md. Details: - Added individuals' github handles to CREDITS file. - Updated RELEASING, config/README.md files. commit 401b69c8f26a86726ac5e1fb4f9fc2d2098ef204 Author: Field G. Van Zee Date: Wed Jul 25 17:55:13 2018 -0500 More indentation in docs/ConfigurationHowTo.md. commit 1c6a1b921ef96999bb449d657cca6d9a556f7245 Author: Field G. Van Zee Date: Wed Jul 25 17:14:58 2018 -0500 Trying new indentation in ConfigurationHowTo.md. Details: - Modified a few sections to take advantage of a feature of markdown that allows a bullet or enumeration to have multiple paragraphs. This is a trial run to make sure the indentation looks good when rendered in a web browser. commit 71f978719527fcf17617cb234e48bf349a76c12d Author: Field G. Van Zee Date: Wed Jul 25 15:55:36 2018 -0500 Whitespace changes to macrokernels' func ptr defs. commit 87d57c31c2bfcf4609dfe31ce915e9345150e613 Author: Field G. Van Zee Date: Wed Jul 25 14:20:18 2018 -0500 Various minor updates to typed, object API docs. commit fb6e16268aaafbab2fd78d47cbf821e2152261fd Author: Field G. Van Zee Date: Wed Jul 25 14:17:28 2018 -0500 Consolidated prototypes in bli_l1v_tapi.h. Details: - Consolidated typed API function prototypes in bli_l1v_tapi.h by leveraging identical function signatures between operations. - Removed 'restrict' keyword since it is not actually present in the function definitions. commit af60d738f21340ccb0903e6c87dbf6af4fc44fc0 Author: Field G. Van Zee Date: Tue Jul 24 15:35:52 2018 -0500 Finished object creation part of BLISObjectAPI.md. Details: - Filled in remaining section on object creation function reference of BLISObjectAPI.md. All object management functions demonstrated as part of the example code in examples/oapi are now documented, as well as some other functions that are not shown in the example code. - Updated variuos links (mostly in function index) to correctly point to the object API reference instead of the typed API reference. - Added documentation to getijm, setijm. commit 8217a6a3b68382c62f016c658d337e6086112fef Author: Field G. Van Zee Date: Tue Jul 24 13:13:10 2018 -0500 Moved sandbox README.md to docs/Sandboxes.md. Details: - Relocated sandbox/ref99/README.md to docs/Sandboxes.md and made minor edits to the document. commit b7db29332394324ffd1a73c3847a75e9a5b38c8d Author: Field G. Van Zee Date: Thu Jul 19 11:14:30 2018 -0500 Explicitly typecast return vals in static funcs. Details: - Added explicit typecasting to various functions (mostly static functions), primarily those in bli_param_macro_defs.h, bli_obj_macro_defs.h, bli_cntx.h, bli_cntl.h, and a few other header files. - This change was prompted by feedback from Jacob Gorm Hansen, who reported that #including "blis.h" from his application caused a gcc to output error messages (relating to types being returned mismatching the declared return types) when used via the C++ compiler front-end. This is the first pass of fixes, and we may need to iterate with additional follow-up commits (#233). commit fa08e5ead95f9d757af6ab5b095a8bf131e3874d Author: Field G. Van Zee Date: Tue Jul 17 19:02:15 2018 -0500 Fixed minor issues in ecbebe7 with mt disabled. Details: - Fixed an unused variable warning in frame/base/bli_rntm.c when multithreading is disabled. - Fixed a missing variable declaration in bli_thread_init_rntm_from_env() when multithreading is disabled. commit ecbebe7c2e43950dfa369f71c2b83cabe348a046 Author: Field G. Van Zee Date: Tue Jul 17 18:37:32 2018 -0500 Defined rntm_t to relocate cntx_t.thrloop (#235). Details: - Defined a new struct datatype, rntm_t (runtime), to house the thrloop field of the cntx_t (context). The thrloop array holds the number of ways of parallelism (thread "splits") to extract per level-3 algorithmic loop until those values can be used to create a corresponding node in the thread control tree (thrinfo_t structure), which (for any given level-3 invocation) usually happens by the time the macrokernel is called for the first time. - Relocating the thrloop from the cntx_t remedies a thread-safety issue when invoking level-3 operations from two or more application threads. The race condition existed because the cntx_t, a pointer to which is usually queried from the global kernel structure (gks), is supposed to be a read-only. However, the previous code would write to the cntx_t's thrloop field *after* it had been queried, thus violating its read-only status. In practice, this would not cause a problem when a sequential application made a multithreaded call to BLIS, nor when two or more application threads used the same parallelization scheme when calling BLIS, because in either case all application theads would be using the same ways of parallelism for each loop. The true effects of the race condition were limited to situations where two or more application theads used *different* parallelization schemes for any given level-3 call. - In remedying the above race condition, the application or calling library can now specify the parallelization scheme on a per-call basis. All that is required is that the thread encode its request for parallelism into the rntm_t struct prior to passing the address of the rntm_t to one of the expert interfaces of either the typed or object APIs. This allows, for example, one application thread to extract 4-way parallelism from a call to gemm while another application thread requests 2-way parallelism. Or, two threads could each request 4-way parallelism, but from different loops. - A rntm_t* parameter has been added to the function signatures of most of the level-3 implementation stack (with the most notable exception being packm) as well as all level-1v, -1d, -1f, -1m, and -2 expert APIs. (A few internal functions gained the rntm_t* parameter even though they currently have no use for it, such as bli_l3_packm().) This required some internal calls to some of those functions to be updated since BLIS was already using those operations internally via the expert interfaces. For situations where a rntm_t object is not available, such as within packm/unpackm implementations, NULL is passed in to the relevant expert interfaces. This is acceptable for now since parallelism is not obtained for non-level-3 operations. - Revamped how global parallelism is encoded. First, the conventional environment variables such as BLIS_NUM_THREADS and BLIS_*_NT are only read once, at library initialization. (Thanks to Nathaniel Smith for suggesting this to avoid repeated calls getenv(), which can be slow.) Those values are recorded to a global rntm_t object. Public APIs, in bli_thread.c, are still available to get/set these values from the global rntm_t, though now the "set" functions have additional logic to ensure that the values are set in a synchronous manner via a mutex. If/when NULL is passed into an expert API (meaning the user opted to not provide a custom rntm_t), the values from the global rntm_t are copied to a local rntm_t, which is then passed down the function stack. Calling a basic API is equivalent to calling the expert APIs with NULL for the cntx and rntm parameters, which means the semantic behavior of these basic APIs (vis-a-vis multithreading) is unchanged from before. - Renamed bli_cntx_set_thrloop_from_env() to bli_rntm_set_ways_for_op() and reimplemented, with the function now being able to treat the incoming rntm_t in a manner agnostic to its origin--whether it came from the application or is an internal copy of the global rntm_t. - Removed various global runtime APIs for setting the number of ways of parallelism for individual loops (e.g. bli_thread_set_*_nt()) as well as the corresponding "get" functions. The new model simplifies these interfaces so that one must either set the total number of threads, OR set all of the ways of parallelism for each loop simultaneously (in a single function call). - Updated sandbox/ref99 according to above changes. - Rewrote/augmented docs/Multithreading.md to document the three methods (and two specific ways within each method) of requesting parallelism in BLIS. - Removed old, disabled code from bli_l3_thrinfo.c. - Whitespace changes to code (e.g. bli_obj.c) and docs/BuildSystem.md. commit 323eaaab99752858b12e81e2eb8e416f009a3028 Author: Devangi N. Parikh Date: Fri Jul 13 11:40:06 2018 -0500 Removed left over code from plotting scripts. commit 60c197736495b47ce974ffb9b43874d1ebcfe78c Author: Field G. Van Zee Date: Thu Jul 12 19:22:14 2018 -0500 Documented accessor functions in BLISObjectAPI.md. Details: - Added documentation to docs/BLISObjectAPI.md for a handful of commonly-used obj_t accessor functions. - Minor updates to docs/BLISTypedAPI.md. commit 77327ad796e11ef67df0cc91d45ed663598ba4df Merge: 73b0b2a3 9fef8575 Author: Devangi N. Parikh Date: Thu Jul 12 17:09:33 2018 -0500 Merge branch 'master' of https://github.com/flame/blis commit 73b0b2a3ac1be6dfbe85c116886b4e29d98ac945 Author: Devangi N. Parikh Date: Thu Jul 12 16:53:10 2018 -0500 Created hardware-specific test driver directory. Details: - Created a 'studies' subdirectory within 'test' to be used to house test drivers, makefiles, run scripts, matlab plot code, and related files that have been customized for collecting performance data on specific host machines or product lines. This new setup will help us catalog, track, and share test driver materials over time, and in a way that facilitates reproducibility. - Created an 'skx' subdirectory within 'test/studies' to house various level-3 test driver files used to measure performance on SkylakeX nodes (specifically, those nodes used by TACC's stampede2 system). commit 9fef85756d15ee0f977fff6e57acd01c20cba184 Author: Field G. Van Zee Date: Wed Jul 11 18:40:30 2018 -0500 Cleaned up loose ends in BLISObjectAPI.md. Details: - Deleted some lines from the API function signatures that did not belong (and were only left over from the copy-paste of the typed API). - Fixed some paragraph-in-bullet indentation. commit 80ddeae4629022b69fdf1f1b053a1fcba643c40c Author: Field G. Van Zee Date: Wed Jul 11 18:31:57 2018 -0500 Added BLISObjectAPI.md to docs. Details: - Added first draft of BLISObjectAPI.md. (Object management section is still missing.) - Small fixes to BLISTypedAPI.md found while writing BLISObjectAPI.md. - In various .md files, changed ``` verbatim blocks to language attributes (e.g. ```c for C code). commit 038442add39ce629fee0d960b212ce0c95138d46 Author: Field G. Van Zee Date: Wed Jul 11 12:24:18 2018 -0500 Added -lpthread to makefile example in BuildSystem.md. Details: - Added missing pthreads library linking to example makefile in docs/BuildSystem.md, as well as similar language to build requirements at the beginning of the document. Thanks to Stefanos Mavros for bringing this to our attention. - Updated CREDITS file. commit bf10d8624e7b5902c9d9189c7c93f318b8e1b9a5 Author: Field G. Van Zee Date: Mon Jul 9 18:40:13 2018 -0500 Small updates to KernelsHowTo.md, BLISTypedAPI.md. Details: - Minor updates to BLISTypedAPI.md, mostly to bring terminology up-to-date with the new "typed API" classification. - Added contents section to KernelsHowTo.md. commit 1fd3bce59e43b422e62f9684bca9d1296a29edc3 Author: Field G. Van Zee Date: Mon Jul 9 18:20:11 2018 -0500 Further updates to KernelsHowTo.md, BLISTypedAPI.md. Details: - Added missing level-1v operations to BLISTypedAPI (e.g. axpbyv, xpbyv). - Updated broken linkes in KernelsHowTo.md based on misnamed anchors. - Other minor changes. commit c40d30a6c920bd2e5a8353a3cd07a7e2b2265758 Author: Field G. Van Zee Date: Mon Jul 9 17:55:54 2018 -0500 Updated KernelsHowTo.md, BLISTypedAPI.md. Details; - Added missing (basic) information in KernelsHowTo.md for level-1f and level-1v kernels. - Updated section regarding contexts. commit f8913c2bf91c0e0fb4e68aedf64a242a19db92a0 Author: Field G. Van Zee Date: Sat Jul 7 20:35:13 2018 -0500 Fixed outdated scalv() calls in penryn l1f kernels. Details: - Fixed stale calls to dscalv() from the dotxf and dotxaxpyf penryn kernels that were not updated during the basic/expert API separation in e88aeda. commit e78e71d549ac17ecd52c7b33008df1cd78f1b59e Author: Field G. Van Zee Date: Sat Jul 7 20:18:09 2018 -0500 Added README.md mention/link to examples/tapi. Details: - Added language to README.md to bring the reader's attention to the example code for the typed API (in addition to those for the object API). commit 419ffb158573a26bfec47bac73e4394e7926a7b8 Author: Field G. Van Zee Date: Sat Jul 7 20:14:23 2018 -0500 Updates to README.md. Details: - Updated wiki links according to renamed/relocated files in 'docs'. - Converted links to relative paths. - Added link to docs/Multithreading.md. commit 7d3e8a7e5f1ec299d009fb6c9071f0c1b089b460 Author: Field G. Van Zee Date: Sat Jul 7 20:01:29 2018 -0500 Reverted docs/*.md links to relative paths. Details: - Within the documents in docs/*.md, reverted links to other local documents to relative paths. - Fixed some links/documents that did not yet have the '.md' suffix. - Testing whether we can use relative links ('docs/BLISTypedAPI.md') from within README.md. commit d97c862c2b9170d774f414e63ae365488fffb4f5 Author: Field G. Van Zee Date: Sat Jul 7 19:40:41 2018 -0500 Updated links (URLs) in docs/*.md. Details: - Updated most markdown links in the documents/wikis to use absolute paths instead of the relative paths that were in use previously. A few links were not updated, except for adding a ".md" to reflect the documents' new names, in order to test whether relative linking still works. commit 3a0c12135875e0fb04de9798664e4fae632d994e Merge: 2c7960c8 bcacddfa Author: Field G. Van Zee Date: Sat Jul 7 16:51:38 2018 -0500 Merge branch 'dev' commit bcacddfad75b20969660606751eea6ead6c42ca9 Author: Field G. Van Zee Date: Sat Jul 7 16:45:29 2018 -0500 Added 'docs' directory with wiki markdown files. Details: - Exported all github wikis to a new 'docs' directory. - Renamed 'BLISAPIQuickReference' wiki to 'BLISTypedAPI' and removed all cntx_t* arguments from the (now non-expert) APIs (with the exception of the kernel APIs). - Added section to BuildSystem documenting new ARG_MAX hack. commit 3ee2bc0f7aa3b08da92331d64271bee99eaf8c1d Author: Field G. Van Zee Date: Sat Jul 7 16:02:16 2018 -0500 Renamed files that distinguish basic/expert APIs. Details: - Renamed various files that were previously named according to a "with context" or "without context" convention. For example, the following files in frame/3 were renamed: frame/3/bli_l3_oapi_woc.c -> frame/3/bli_l3_oapi_ba.c frame/3/bli_l3_oapi_wc.c -> frame/3/bli_l3_oapi_ex.c frame/3/bli_l3_tapi_woc.c -> frame/3/bli_l3_tapi_ba.c frame/3/bli_l3_tapi_wc.c -> frame/3/bli_l3_tapi_ex.c Here, the "ba" is for "basic" and "ex" is for "expert". This new naming scheme will make more sense especially if/when additional expert parameters are added to the expert APIs (typed and object). commit e88aedae735dfeb6fa5ac28d4527eb3ca58c6510 Author: Field G. Van Zee Date: Fri Jul 6 19:14:02 2018 -0500 Separated expert, non-expert typed APIs. Details: - Split existing typed APIs into two subsets of interfaces: one for use with expert parameters, such as the cntx_t*, and one without. This separation was already in place for the object APIs, and after this commit the typed and object APIs will have similar expert and non- expert APIs. The expert functions will be suffixed with "_ex" just as is the case for expert interfaces in the object APIs. - Updated internal invocations of typed APIs (functions such as bli_?setm() and bli_?scalv()) throughout BLIS to reflect use of the new explictly expert APIs. - Updated example code in examples/tapi to reflect the existence (and usage) of non-expert APIs. - Bumped the major soname version number in 'so_version'. While code compiled against a previous version/commit will likely still work (since the old typed function symbol names still exist in the new API, just with one less function argument) the semantics of the function have changed if the cntx_t* parameter the application passes in is non-NULL. For example, calling bli_daxpyv() with a non-NULL context does not behave the same way now as it did before; before, the context would be used in the computation, and now the context would be ignored since the interace for that function no longer expects a context argument. commit 331694e52414c0cd50048daf880a9ace9e29b94a Author: Isuru Fernando Date: Fri Jul 6 09:07:38 2018 -0600 Fix windows build and enable x86_64 on appveyor (#230) * Upload artifacts built on appveyor (#228) * Upload artifacts * Fix install in appveyor * Remove windows.h in bli_winsys.c (#229) Looks like it is unneeded. * Implemented ARG_MAX hack in configure, Makefile. Details: - Added support for --enable-arg-max-hack to configure, which will change the behavior of make when building BLIS so that rather than invoke the archiver/linker with all of the object files as command line arguments, those object files are echoed to a temporary file and then the archiver/linker is fed that temporary file via the @ notation. An example of this can be found in the GNU make docs at https://www.gnu.org/software/make/manual/make.html#File-Function - Thanks to Isuru Fernando for prompting this feature. * Enable x86_64 and arg-max-hack on appveyor * Use gas style assembly for clang on windows commit a64a780d28c99d35f237f59212772e9beff35b3e Merge: 89e178ce 3cb396d1 Author: Devin Matthews Date: Fri Jul 6 09:38:42 2018 -0500 Merge pull request #231 from flame/travis-pr Disable SDE for PRs commit 3cb396d1ae4ee569f862db201c6a976712fd128e Author: Devin Matthews Date: Fri Jul 6 09:19:44 2018 -0500 Disable SDE for PRs Pull requests cannot use Travis secret variables, so SDE needs to be disabled. This PR should suffice as a test. commit 2c7960c8416ee9b67364be5f2b210fd7a0aec4b5 Author: Field G. Van Zee Date: Thu Jul 5 14:38:33 2018 -0500 Implemented ARG_MAX hack in configure, Makefile. Details: - Added support for --enable-arg-max-hack to configure, which will change the behavior of make when building BLIS so that rather than invoke the archiver/linker with all of the object files as command line arguments, those object files are echoed to a temporary file and then the archiver/linker is fed that temporary file via the @ notation. An example of this can be found in the GNU make docs at https://www.gnu.org/software/make/manual/make.html#File-Function - Thanks to Isuru Fernando for prompting this feature. commit c422a5cd191d47e6aeb9cea6de0e348f46e3e318 Merge: b6470262 89e178ce Author: Field G. Van Zee Date: Thu Jul 5 12:33:35 2018 -0500 Merge branch 'dev' commit b6470262ea66c0f48a5b4d85ca4bf85c1fb2b3af Author: Isuru Fernando Date: Wed Jul 4 19:14:29 2018 -0600 Remove windows.h in bli_winsys.c (#229) Looks like it is unneeded. commit eac4bdf98691c5ec784af0dc11d1ad2269840661 Author: Isuru Fernando Date: Wed Jul 4 18:31:01 2018 -0600 Upload artifacts built on appveyor (#228) * Upload artifacts * Fix install in appveyor commit 89e178ce380439dea951925e33703dc4b979e914 Merge: d868eb3e e32b2ef9 Author: Field G. Van Zee Date: Wed Jul 4 17:51:16 2018 -0500 Merge branch 'master' into dev commit e32b2ef983ea1c3521dd3821116c0078690f125e Author: Field G. Van Zee Date: Wed Jul 4 17:49:39 2018 -0500 Update to CREDITS file. commit 14648e137696484e0ff04f89b16c6b4183ea42b8 Author: Isuru Fernando Date: Wed Jul 4 16:48:42 2018 -0600 Native windows support using clang (#227) * Add appveyor file * Build script * Remove fPIC for now * copy as * set CC and CXX * Change the order of immintrin.h * Fix testsuite header * Move testsuite defs to .c * Fix appveyor file * Remove fPIC again and fix strerror_r missing bug * Remove appveyor script * cd to blis directory * Fix sleep implementation * Add f2c_types_win.h * Fix f2c compilation * Remove rdp and rename appveyor.yml * Remove setenv declaration in test header * set CPICFLAGS to empty * Fix another immintrin.h issue * Escape CFLAGS and LDFLAGS * Fix more ?mmintrin.h issues * Build x86_64 in appveyor * override LIBM LIBPTHREAD AR AS * override pthreads in configure * Move windows definitions to bli_winsys.h * Fix LIBPTHREAD default value * Build intel64 in appveyor for now commit b45ea92fc6f77f2313b50dbe95922f838cbead07 Author: Field G. Van Zee Date: Tue Jul 3 18:27:29 2018 -0500 Added typed (BLAS-like) API code examples. Details: - Added new example code to examples/tapi demonstrating how to use the BLIS typed API. These code examples directly mirror the corresponding example code files in examples/oapi. This setup provides a convenient opportunity for newcomers to BLIS to compare and contrast the typed and object APIs when they are used to perform the same tasks. - Minor cleanups to examples/oapi. commit d868eb3e200f657a1284c4cc933e7a4d25260dce Author: Field G. Van Zee Date: Fri Jun 29 12:36:04 2018 -0500 Implemented bli_obj_scalar_cast_to(). Details: - Implemented bli_obj_scalar_cast_to(), which will typecast the value in the internal scalar of an obj_t to a specified datatype. - Changed bli_obj_scalar_attach() so that the scalar value being attached is first typecast to the storage datatype of the destination object rather than the target datatype. - Reformatted function type signatures in bli_obj_scalar.c as well as prototypes in its corresponding header file. commit 52d80b5f09517d80ac8a7c96983a576c1ec2080b Author: Field G. Van Zee Date: Fri Jun 29 12:30:44 2018 -0500 Fixed static funcs related to target and exec dts. Details: - Fixed incorrect bit shifts in the following static functions: bli_obj_set_target_domain() bli_obj_set_target_prec() bli_obj_set_exec_domain() bli_obj_set_exec_prec() - Fixed incorrect bitmask in bli_dt_proj_to_single_prec(). - Updated bli_obj_real_part() and bli_obj_imag_part() so that it updates the target and exec datatypes (in addition to the storage datatypes). commit e006f2d0eeb229c1cd05a424496a774c29bdc5d7 Merge: bd8c55fe dafca7a0 Author: Field G. Van Zee Date: Wed Jun 27 15:54:38 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit bd8c55fe268e8e352508341ebd739ef4fc68eb92 Author: Field G. Van Zee Date: Wed Jun 27 15:52:37 2018 -0500 Added dt_on_output field to auxinfo_t. Details: - Added a new field to the auxinfo_t struct that can be used, in theory, to request type conversion before the microkernel stores/accumulates its microtile back to memory. - Added the appropriate get/set static functions to bli_type_defs.h. commit dafca7a0c2c72aaf15cb588b2bef6f246abb1905 Author: Devin Matthews Date: Mon Jun 25 16:20:10 2018 -0500 Fix botched memory addressing in Penryn kernel (no effect for GAS output). commit de493b0f349efebab98ab17f063d4d3d932c24c3 Merge: 195480be a7166feb Author: Devin Matthews Date: Mon Jun 25 14:26:06 2018 -0500 Merge pull request #226 from devinamatthews/dev Finish macroization of assembly ukernels. commit 195480beb589db7d582646f556e855c611d4c3a9 Merge: 07c3d0a9 3f387ca3 Author: Field G. Van Zee Date: Mon Jun 25 13:24:21 2018 -0500 Merge branch 'master' into dev commit 3f387ca35e42519f0d6a154814e4c8800fa2acb8 Author: Field G. Van Zee Date: Mon Jun 25 12:32:03 2018 -0500 Fixed bugs in configure's select_cc() function. Details: - This commit fixes several bugs in configure relating to selecting a C compiler. By dumb luck, two of the two bugs sort of cancelled each other out in most use cases, which manifested as the expected behavior. Thanks to Mathieu Poumeyrol for bringing this issue to our attention, and to Devin Matthews for suggesting the more portable way of capturing both stdout and stderr and suggesting a return code check instead of testing stdout/stderr. - The first bug: As the values of the compiler search list are iterated over, only stderr is captured when querying a compiler with --version rather than both stdout and stderr. - The second bug: After each query, a conditional attempted to test whether the query resulted in anything being output. That conditional erroneously was using "-z" instead of "-n" for non-emptiness. Thus, most of the time, stderr was empty (because the --version info was being output on stdout), and since it was empty, the -z conditional (intended to execute only when a compiler was found to be responsive) executed. - A third bug was also fixed in the way that the merged stdout/stderr output was tested for non-emptiness (moving the 'cat' invocation to another line and testing the contents of a variable instead). - The three bugs above have been fixed as part of a partial rewrite of the select_cc() function in terms of a return code check, which obviated the need to save the output of stdout and stderr. - The fourth bug involved a misnamed variable in the right-hand side of a statement intended to prepend CC to search_list when CC was non-empty. This typically did not manifest as a bug since usually CC (if it was set) was set to a value that was known to work. commit a7166feb1053814b7dd27f3879ae38acfc9637fc Author: Devin Matthews Date: Mon Jun 25 12:09:18 2018 -0500 Finish macroization of assembly ukernels. commit f986396c2af5de06283b9834112782afd0a8907e Author: Field G. Van Zee Date: Fri Jun 22 18:12:40 2018 -0500 Added 'configure --help' text for CFLAGS, LDFLAGS. Details: - Added mention of the new support for preset CFLAGS, LDFLAGS to the bottom of the text output by './configure --help'. - Updated usage example to use 'haswell' instead of 'sandybridge'. commit 884175d9ffb62e49535e6c1f7d58fb3b83e7e78f Author: Field G. Van Zee Date: Fri Jun 22 18:08:43 2018 -0500 Added configure support for preset CFLAGS, LDFLAGS. Details: - Any preexisting values set to the CFLAGS environment variable (or the CFLAGS variable if given on the command line) are saved by configure for later inclusion (prepending, to be precise) along with the compiler flags automatically determined by the BLIS build system. LDFLAGS is treated in a similar manner.) Thanks to Dave Love for requesting this feature in issue #223 and Mathieu Poumeyrol for his support on this and a previous related issue. - Comment updates to build/config.mk.in. - Strip whitespace from return value of various cflags functions in common.mk. commit 07c3d0a95190bd23f0cd2ef220deb3384d8378d1 Author: Field G. Van Zee Date: Thu Jun 21 12:35:07 2018 -0500 Update to CREDITS file. commit a1ebbbf158c7b34c9032ef45431bc610b6f14858 Merge: 17928b1c c81c6f23 Author: Devin Matthews Date: Wed Jun 20 15:37:53 2018 -0500 Merge pull request #224 from devinamatthews/asm-macros Asm macros commit c81c6f23b9547b5d55ae68fd5a3bbd8a78290b6b Author: Devin Matthews Date: Wed Jun 20 15:20:44 2018 -0500 Fix problem with inc and dec macros. commit 5a63971c822fd452f97ba869625c8e87f6cbeebc Merge: b4d94e54 17928b1c Author: Devin Matthews Date: Wed Jun 20 14:07:49 2018 -0500 Merge remote-tracking branch 'upstream/dev' into asm-macros commit b4d94e54d44cf30e4bb452ca5263be3473c0582d Author: Devin Matthews Date: Wed Jun 20 14:07:24 2018 -0500 Convert x86 microkernels to assembly macros. commit 17928b1c9941aa58aef1f122c793e2b14e705267 Author: Field G. Van Zee Date: Tue Jun 19 17:59:03 2018 -0500 Added static funcs bli_dt_domain(), bli_dt_prec(). Details: - Added definitions of static functions bli_dt_domain()/bli_dt_prec(), which extract a dom_t domain or prec_t precision value, respectively, from a num_t datatype. - Changed the return types of bli_obj_domain() and bli_obj_prec() from objbits_t to dom_t and prec_t. (Not sure why they were ever set to return objbits_t.) commit 5f7fbb7115b1bf532c169dfd9adef84c41a95031 Author: Field G. Van Zee Date: Tue Jun 19 15:38:55 2018 -0500 Static funcs for projecting dt to single/double. Details: - Added static functions for projecting a datatype to single precision or double precision, both for obj_t's storage datatypes and standalone datatypes. commit d4a22702c7a90273dc14f271db465c2e11e5b87e Author: Field G. Van Zee Date: Tue Jun 19 14:54:57 2018 -0500 Set up haswell config for optional col-pref ukrs. Details: - Added two presently-disabled cpp blocks in bli_cntx_init_haswell.c to easily allow one to switch to a set of column-preferential gemm microkernels (in the haswell subconfiguration). The second column- preferring block sets the the register blocksizes to their appropriate values. However, cache blocksizes are left unchanged, and therefore are likely suboptimal. This should be addressed later. commit f317c2e31bfc329cb6bb4e06005e45b9c8a9d6a7 Author: Field G. Van Zee Date: Tue Jun 19 12:21:23 2018 -0500 Added get/set static funcs for exec dt/dom/prec. Details: - Added functions to bli_obj_macro_defs.h to get and set the target domain and target precision bits in the obj_t, and also added the appropriate support in bli_type_defs.h. commit e88a5b8da8c26caebd2b0fb73b30836fb5417c9c Author: Field G. Van Zee Date: Mon Jun 18 15:56:26 2018 -0500 Implemented castm, castv operations. Details: - Implemented castm and castv operations, which behave like copym and copyv except where the obj_t operands can be of different datatypes. These new operations, however, unlike copym/copyv, do not build upon existing level-1v kernels. - Reorganized projm, projv into a 'proj' subdirectory of frame/base (to match the newly added frame/base/cast directory). - Added new macros to bli_gentfunc_macro_defs.h, _gentprot_macro_defs.h that insert GENTFUNC2/GENTPROT2 macros for all non-homogeneous datatype combinations. Previously, one had to invoke two additional macros--one which mixed domains only and another that included all remaining cases--in order to get full type combination coverage. - Defined a new static function, bli_set_dims_incs_2m(), to aid in the setting of various variables in the implementations of bli_??castm(). This static function joins others like it in bli_param_macro_defs.h. - Comment update to bli_copysc.h. commit 2000cdff59272974438e88e0e82d8e1a32710325 Author: Field G. Van Zee Date: Mon Jun 18 14:17:28 2018 -0500 Update to CREDITS file. commit ed2c8aed848ba2dede18df090cf2e0b6e4cc059f Author: Field G. Van Zee Date: Mon Jun 18 11:49:34 2018 -0500 Temporarily disabled small matrix handling on zen. Details: - Disabled small matrix handling in config/zen/bli_family_zen.h due to what appears to be a bug that manifests as failures in the single and double precision real level-3 BLAS test drivers (visible via out.sblat3 and out.dblat3). Thanks to Robin Christ for reporting this issue. commit ed20392c500940bfc0947795c1ff7c8c24f8e26f Author: Field G. Van Zee Date: Fri Jun 15 16:31:22 2018 -0500 Added get/set static funcs for exec dt/dom/prec. Details: - Added functions to bli_obj_macro_defs.h to get and set the execution domain and execution precision bits in the obj_t. - Added/rearranged a few functions in bli_obj_macro_defs.h. - Renamed some macros in bli_type_defs.h: EXECUTION -> EXEC. commit 22594e8e9ab55f5bc0e69d96a23e128502849999 Author: Field G. Van Zee Date: Thu Jun 14 17:35:23 2018 -0500 Updated sandbox/ref99 according to f97a86f. Details: - Applied changes to ref99 sandbox analagous to those applied to framework code in f97a86f. This involves setting the pack schemas of A and B objects temporarily to communicate those desired schemas to the control tree creation function in blx_gemm_cntl.c. This allows us to (henceforth) query the schemas from the control tree rather than the context. commit 1b5d0424d2c7e5eac33e02359c12917ef280949f Author: Field G. Van Zee Date: Wed Jun 13 18:41:32 2018 -0500 Prototype column-preferential zen gemm ukernels. Details: - Added prototypes to bli_kernels_zen.h for each of the four gemm microkernels that prefer outputting to column storage. commit f88c2e7a539e383297e846e6d4647058dd3db128 Author: Field G. Van Zee Date: Wed Jun 13 18:27:46 2018 -0500 Defined static function bli_blksz_scale_def_max(). Details: - Added a new static function to bli_blksz.h that scales both the default (regular) blocksize as well as the maximum blocksize in the blksz_t object. Reminder: maximum blocksizes have different meanings in different contexts. For register blocksizes, they refer to the packing register blocksizes (PACKMR or PACKNR) while for cache blocksizes, they refer to the maximum blocksize to use during the final iteration of a loop. commit 87db5c048e0c7f37351fda486abaf7d19fc5821c Author: Field G. Van Zee Date: Tue Jun 12 19:38:37 2018 -0500 Changed usage of virtual microkernel slots in cntx. Details: - Changed the way virtual microkernels are handled in the context. Previously, there were query routines such as bli_cntx_get_l3_ukr_dt() which returned the native ukernel for a datatype if the method was equal to BLIS_NAT, or the virtual ukernel for that datatype if the method was some other value. Going forward, the context native and virtual ukernel slots will both be initialized to native ukernel function pointers for native execution, and for non-native execution the virtual ukernel pointer will be something else. This allows us to always query the virtual ukernel slot (from within, say, the macrokernel) without needing any logic in the query routine to decide which function pointer (native or virtual) to return. (Essentially, the logic has been shifted to init-time instead of compute-time.) This scheme will also allow generalized virtual ukernels as a way to insert extra logic in between the macrokernel and the native microkernel. - Initialize native contexts (in bli_cntx_ref.c) with native ukernel function addresses stored to the virtual ukernel slots pursuant to the above policy change. - Renamed all static functions that were native/virtual-ambiguous, such as bli_cntx_get_l3_ukr_dt() or bli_cntx_l3_ukr_prefers_cols_dt() pursuant to the above polilcy change. Those routines now use the substring "get_l3_vir_ukr" in their name instead of "get_l3_ukr". All of these functions were static functions defined in bli_cntx.h, and most uses were in level-3 front-ends and macrokernels. - Deprecated anti_pref bool_t in context, along with related functions such as bli_cntx_l3_ukr_eff_dislikes_storage_of(), now that 1m's panel-block execution is disabled. commit dbaf440540837b03643190cd685ed889fa7fd212 Merge: 22aa44eb 2610fff0 Author: Field G. Van Zee Date: Mon Jun 11 12:37:04 2018 -0500 Merge branch 'master' into dev commit 2610fff0b07bdb345cb2e334ef6bea0c63c8cead Author: Field G. Van Zee Date: Mon Jun 11 12:32:54 2018 -0500 Renamed 1m packm kernels from _1e to _1er. Details: - Renamed the reference packm kernels used by 1m. Previously, they used a _1e suffix, which was confusing since they packed to both 1e and 1r schemas. This was likely an artifact of the time when there were separate kernels for each schema before I decided to combine them into a single function (per datatype and panel dimension), and the 1e functions were the ones to inherit the 1r functionality. The kernels have now been renamed to use a _1er suffix. commit 7af5283dcc3dded114852d6013d33134021b81aa Author: sraut Date: Mon Jun 11 15:00:22 2018 +0530 added check condition on n-dimension for XA'=B intrinsic code to process till 128 size Change-Id: I95d020a5ca3ea21d446b8c2e379d56e1eea18530 commit 712de9b371a8727682352a2f52cd4880de905f0b Author: Field G. Van Zee Date: Sat Jun 9 14:36:30 2018 -0500 Added missing semicolon in 03obj_view.c Details: - Thanks to Tony Skjellum for pointing out this typo due to a last-minute change to the source prior to committing. commit 043d0cd37ef4a27b1901eeb89d40083cfb2a57ba Author: Field G. Van Zee Date: Sat Jun 9 13:46:49 2018 -0500 Implemented bli_acquire_mpart(), added example code. Details: - Implemented bli_acquire_mpart(), a general-purpose submatrix view function that will alias an obj_t to be a submatrix "view" of an existing obj_t. - Renumbered examples in examples/oapi and inserted a new example file, 03obj_view.c, which shows how to use bli_acquire_mpart() to obtain submatrix views of existing objects, which can then be used to indirectly modify the parent object. commit f1908d39767baef56077def69126d96f805ee27e Author: Field G. Van Zee Date: Fri Jun 8 14:22:22 2018 -0500 Fixed broken input.operations.fast. Details: - Removed three input lines from input.operations.fast (labeled "test sequential micro-kernel") that I intended to remove in bd02c4e. These lines prevented 'make check' (and 'make checkblis-fast') from completing correctly. Note: This bug was fixed in 3df39b3, but that commit has not yet been merged into master, hence this redundant commit. Thanks to Robert van de Geijn for reporting this issue. commit 262a62e3482c5caa947a89cabb562b5887555bd6 Author: Field G. Van Zee Date: Fri Jun 8 12:10:54 2018 -0500 Fixed undefined ref in steamroller/excavator configs. Details: - Fixed erroneous calls to bli_cntx_init_piledriver_ref() in bli_cntx_init_steamroller() and bli_cntx_init_excavator(), which should have been to their respectively-named bli_cntx_init_*() functions instead. Thanks to qnerd for bringing these bugs to our attention. commit 22aa44ebec2c7884bdc944775a1aa7534ab53f0d Merge: 65fae950 b65d0b84 Author: Field G. Van Zee Date: Thu Jun 7 17:42:59 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 65fae95074d239354737355bbe6f202d4f8b2871 Author: Field G. Van Zee Date: Thu Jun 7 17:41:09 2018 -0500 Implemented bli_setrm, _setim, _setrv, _setiv. Details: - Defined new wrappers to setm/setv operations in frame/base/bli_setri.c that will target only the real or only the imaginary parts of a matrix/vector object. - Updated bli_obj_real_part() so that the complex-specific portions of the function are not executed if the object is real. - Defined bli_obj_imag_part(). - Caveat: If bli_obj_imag_part() is called on a real object, it does nothing, leaving the destination object untouched. The caller must take care to only call the function on complex objects. - Reordered some of the static functions in bli_obj_macro_defs.h related to aliasing. commit b65d0b841b7e4357bc2cf743bbb03384a3ab0bfa Author: Field G. Van Zee Date: Thu Jun 7 14:38:41 2018 -0500 Fixed bug in bli_dt_proj_to_complex(). Details: - Fixed a bug identical to the one fixed in 0a4a27e, except this time in the bli_obj_param_defs.h header file. It looks like the only consumers of this static function were in bli_l0_oapi.c, and so this may not have been manifesting (yet). commit 55b6abdf7458e31df3ad01796d67c2332c776948 Author: Field G. Van Zee Date: Thu Jun 7 14:08:12 2018 -0500 Enforce consistent datatypes in most object APIs. Details: - Added logic to level-1v, -1d, -1f, -1m, -2, and -3 operations' _check() functions to ensure that all operands are of the same datatype. There are some exceptions that were left out, such as the _check() function for the various norm operations since they have a different idea of datatype consistency (ie: the norm object must be the real projection of the primary input vector/matrix object). commit 513138b1a1ecebd015580423c779810cae5c67f2 Author: Field G. Van Zee Date: Thu Jun 7 12:24:47 2018 -0500 Defined/implemented bli_projv(). Details: - Added an implementation for bli_projv() to go along with the implementation of bli_projm() added in 0a4a27e. The only difference between the two is that bli_projv() may only be used on vectors, whereas bli_projm() is general-purpose. - Added a _check() function corresponding to bli_projv(). commit 5f71c1e719eb482b2a4e40daa280c4f7d05b6963 Merge: b5a641e9 3df39b37 Author: Field G. Van Zee Date: Wed Jun 6 19:06:14 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit b5a641e968469805906eb2c971384d12ad1beac5 Author: Field G. Van Zee Date: Wed Jun 6 19:05:37 2018 -0500 Added char-to-dt and dt-to-char mapping functions. Details: - Defined additional functions in bli_param_map.c: bli_param_map_char_to_blis_dt() bli_param_map_blis_to_char_dt() which will map a char to its corresponding num_t, or vice versa. commit 0a4a27e1a4487480410bc0b1bb034bcf97583214 Author: Field G. Van Zee Date: Wed Jun 6 19:02:29 2018 -0500 Defined/implemented bli_projm(). Details: - Defined a new operation in frame/base/bli_proj.c, bli_projm(), which behaves like bli_copym(), except that operands a and b are allowed to contain data of differing domains (e.g. a is real while b is complex, or vice versa). The file is named bli_proj.c, rather than bli_projm.c, with the intention that a 'v' vector version of the function may be added to the same file (at some point in the future). - Added supporting bli_check_*() functions in bli_check.c to confirm consistent precisions between to datatypes/objects, as well as the appropriate error message in bli_error.c and a new error code in bli_type_defs.h. - Wrote a bli_projm_check() function to go along with bli_projm(). - Defined static function bli_obj_real_part() in bli_obj_macro_defs.h, which will initialize an obj_t alias to the real part of the source object. - Fixed a bug in the static function bli_dt_proj_to_complex(), found in bli_param_macro_defs.h. Thankfully, there were no calls to the function to produce buggy behavior. commit 3df39b37a0134befa34b6b6259db98467c7bc965 Author: Field G. Van Zee Date: Wed Jun 6 15:35:05 2018 -0500 Fixed recently broken input.operations.fast. Details: - Removed "test sequential front-end" lines from microkernel test entries of input.operations.fast. This change was meant for inclusion in bd02c4e but was missed due to slightly different wording of the comment (I used "sed //d" to remove the lines). This fixes the broken 'make checkblis-fast' (and 'make check') targets. commit 695cd520e2f5eab938f66afe9fe36201ab2700c5 Author: sraut Date: Wed Jun 6 11:48:56 2018 +0530 AMD Copyright information changed to 2018 Change-Id: Idfd11afd5d252f8063d0158680d24bf7e2854469 commit df1dd24fd896821de60917b429f303bab7fd0d4b Author: sraut Date: Wed Jun 6 11:24:33 2018 +0530 small matrix trsm intrinsics optimization code for AX=B and XA'=B Change-Id: I90123c4d9adbd314c867995cd19dc975150b448c commit 3f48c38164b4135515b5c752c506fdccc4480be2 Author: Field G. Van Zee Date: Tue Jun 5 16:52:35 2018 -0500 Cosmetic fix to configure output in config.mk. Details: - Fixed configure so that MK_ENABLE_MEMKIND is assigned "no" when the option is disabled due to libmemkind not being present. This wasn't affecting anything since the one use of the variable (in common.mk) was formulated as "ifeq ($(MK_ENABLE_MEMKIND),yes)". That is, the variable being empty was effectively equivalent to it being set to "no". - Comment updates to build/config.mk.in, common.mk. commit 5df201260f64aa98a365931f6d2da70144d69932 Merge: 1b9af85e 96d2774b Author: Field G. Van Zee Date: Tue Jun 5 16:14:19 2018 -0500 Merge branch 'master' into dev commit 1b9af85ec98d91bb2b27aadaa3df344d18faff35 Author: Field G. Van Zee Date: Tue Jun 5 16:07:13 2018 -0500 Updated ref99 call to _cntx_set_thrloop_from_env(). Details: - Reordered the arguments in the ref99 sandbox's call to bli_cntx_set_thrloop_from_env() to be consistent with the updated function signature from f97a86f. Thanks to Devangi Parikh for reporting this issue. commit 96d2774b4cb44ff1e8b5798d7cfc83154a607624 Author: Tyler Michael Smith Date: Tue Jun 5 14:17:39 2018 +0200 Make bli_auxinfo_next_b() return b_next, not a_next (#216) commit d4c24ea5f644eb635046e7fe249d3e8e58b4c98a Author: sraut Date: Tue Jun 5 15:42:59 2018 +0530 copyright message changed to 2018 Change-Id: I33c1ebda41bc7f1973ff19e3b1947bdad62b4d44 commit 3f1ba4e646776699ebfaa042fe24691d9e2f55d0 Author: sraut Date: Tue Jun 5 14:21:13 2018 +0530 copyright changed to 2018 Change-Id: Ie916c7cd6f95aedc3cab6eec3a703c9ddb333bc3 commit bd02c4e9f7fe07487276e61507335d48c8e05f35 Author: Field G. Van Zee Date: Mon Jun 4 13:42:17 2018 -0500 Cleanups to testsuite, input.operations format. Details: - Removed the line in each operation entry in input.operations titled "test sequential front-end" and the corresponding support for the lines in the testsuite input parsing code. This line was included in the some of the earliest versions of the testsuite, back when I intended to eventually have separate multithreaded APIs. Specifically, I envisioned that multithreaded and sequential testing could be enabled or disabled on an operation level. However, BLIS evolved in a different direction and still does not have multithreaded-specific APIs (even if it will eventually someday). But even if it did have such APIs, I doubt I would allow the user to enable/disable them on an operation level. Thus, this was a zombie future parameter that was never used and never made sense to begin with. The one instance of the front_seq variable, used in the various libblis_test_() functions to guard the call to the operation test driver, that remains was commented out instead of deleted so that someday it could be easily changed via sed, if desired. - Various minor cleanups to the testsuite code, including consolidating use of DISABLE and DISABLE_ALL and reexpressing certain conditional expressions in the libblis_test_() functions in terms of boolean functions. commit 2c6d99b99e50d70f904da298a0c59be16cc5c180 Author: Field G. Van Zee Date: Sun Jun 3 18:13:36 2018 -0500 Fixed names out of alphabetical order in CREDITS. commit 7a207e8f2c5046f8b295a78e029ff2de765c7409 Author: Field G. Van Zee Date: Sun Jun 3 18:04:27 2018 -0500 Disabled indirect blacklisting (issue #214). Details: - Return early from function, pass_config_kernel_registries(), that implements indirect blacklisting of subconfigurations (during pass 0). In short, I realized that indirect blacklisting is not needed in the situations I envisioned, and can actually cause problems under certain circumstances. Thanks to Tony Skjellum for reporting the issue (#214) that led to this commit, and to Devin Matthews for prompting me to realize that indirect blacklisting was unnecessary, at least as originally envisioned. commit d7fb32682057c7458c8891c0eedafc374fd9beef Author: Field G. Van Zee Date: Sun Jun 3 13:20:37 2018 -0500 Fixed syntax artifacts from 4b36e85 in examples. Details: - Fixed artifacts of malformed recursive sed expressions used when preparing 4b36e85, in which most function-like macros were converted to static functions. The syntactically defective code was contained entirely in examples/oapi. Thanks to Tony Skjellum for reporting this issue. - Update to CREDITS file. commit ed7dedfd4a07eefeb5a038f9899afb8053b45383 Merge: f97a86f3 469727d4 Author: Field G. Van Zee Date: Sat Jun 2 20:29:53 2018 -0500 Merge branch 'master' into dev commit f97a86f322a6e3e31f33c89befc66189b0b8c64f Author: Field G. Van Zee Date: Sat Jun 2 20:28:20 2018 -0500 Updated setting/querying pack schema (cntx->cntl). - Query pack schemas in level-3 bli_*_front() functions and store those values in the schema bitfields of the correponding obj_t's when the cntx's method is not BLIS_NAT. (When method is BLIS_NAT, the default native schemas are stored to the obj_t's.) - In bli_l3_cntl_create_if(), query the schemas stored to the obj_t's in bli_*_front(), clear the schema bitfields, and pass the queried values into bli_gemm_cntl_create() and bli_trsm_cntl_create(). - Updated APIs for bli_gemm_cntl_create() and bli_trsm_cntl_create() to take schemas for A and B, and use these values to initialize the appropriate control tree nodes. (Also cpp-disabled the panel-block cntl tree creation variant, bli_gemmpb_cntl_create(), as it has not been employed by BLIS in quite some time.) - Simplified querying of schema in bli_packm_init() thanks to above changes. - Updated openmp and pthreads definitions of bli_l3_thread_decorator() so that thread-local aliases of matrix operands are guaranteed, even if aliasing is disabled within the internal back-end functions (e.g. bli_gemm_int.c). Also added a comment to bli_thrcomm_single.c explaining why the extra aliasing is not needed there. - Change bli_gemm() and level-3 friends so that the operation's ind() function is called only if all matrix operands have the same datatype, and only if that datatype is complex. The former condition is needed in preparation for work related to mixed domain operands, while the latter helps with readability, especially for those who don't want to venture into frame/ind. - Reshuffled arguments in bli_cntx_set_thrloop_from_env() to be consistent with BLIS calling conventions (modified argument(s) are last), and updated all invocations in the level-3 _front() functions. - Comment updates to bli_cntx_set_thrloop_from_env(). commit 965db85d29977d228ea744581edf2b682eb8e8a8 Author: Field G. Van Zee Date: Fri Jun 1 12:32:15 2018 -0500 Updated macro invocations in bli_gemm_ker_var2.c. Details: - Updated "get next a/b micropanel" macro invocations in bli_gemm_ker_var2.c according to changes in 9588625. - Comment update in bli_cntx.c. commit 8749fa0b48a7710f4115023e2c46bc80167bc8f9 Author: Field G. Van Zee Date: Thu May 31 12:34:01 2018 -0500 Cleanups to ref99/README.md, test/3m4m/Makefile. Details: - Minor edits to sandbox/ref99/README.md. - Removed cpp guards in sandbox/ref99/thread/blx_gemm_thread.h to be consistent with other headers in sandbox/ref99. - Additional targets and related cleanups in test/3m4m/Makefile. commit 9588625c43c86ef1bde8140f620a30f52420e6a6 Author: Field G. Van Zee Date: Wed May 30 15:19:53 2018 -0500 Renamed "next micropanel" macros in _l3_thrinfo.h. Details: - Renamed several macros defined in bli_l3_thrinfo.h designed to compute the values of a_next and b_next to insert into an auxinfo_t struct in level-3 macrokernels. (Previously, the macros did not use a bli_ prefix.) - Updated instances of above macro usage within various macrokernels. commit e4420591225fca2f63ca74ef6a23b962fcd4bec0 Merge: 34f974d1 850a8a46 Author: Field G. Van Zee Date: Tue May 29 17:12:22 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 34f974d1a83a7d29ba09f67e392d361231fdf99c Author: Field G. Van Zee Date: Tue May 29 17:11:52 2018 -0500 More tweaks/updates to sandbox/ref99/README.md. commit 850a8a46c0a569a2652d8c200e5c53b61bcf988d Author: Devin Matthews Date: Tue May 29 13:51:21 2018 -0500 Test all x86_64 configurations*... (#212) * Add custom SDE cpuid files. * Set up testing of all x86_64 architectures (except bulldozer) using SDE. * Update .travis.yml [ci skip] * Update do_testsuite.sh [ci skip] * Updated .travis.yml with my secret token. Details: - Replaced Devin's temporary secret token with my own, which is used by Travis when accessing the Intel SDE via Dropbox. * Work around CPUID dispatch in glibc/libm by patching ld.so. * Detect path of loader at runtime. * Attempt to make SDE run on Travis * Allow unpatched ld.so if we don't know how to patch it. I *think* this only happens for older glibc without the multi-arch stuff (e.g. Ubuntu 14.04 on Travis), but who knows? * Upgrade Travis to gcc-6 and binutils-2.26. * Try to get Travis to use the right assembler. * Apparently you need ld-2.26 too. * Try to also patch ld.so from Ubuntu 14.04. * Take the nuclear option. * Account for non-absolute dependencies in ldd output. * String manipulation fail. * Update patch-ld-so.py * Add Zen to SDE testing. * Removed dead variable from travis/do_testsuite.sh. Details: - Removed 'BLIS_ENABLE_TEST_OUTPUT=yes' from make invocations in travis/do_testsuite.sh. This variable is no longer present in the BLIS build system (if it ever was?), and therefore has no effect. commit 42ea02a34e5c144893fe239ae55daef895d92677 Author: Field G. Van Zee Date: Tue May 29 12:48:14 2018 -0500 Renamed c99 sandbox to ref99. Details: - Renamed sandbox/c99 to sandbox/ref99. I wanted to name the sandbox so that it would be thought of as a "reference" sandbox. I kept the "99" to differientiate it from future reference sandboxes that may be written in another language (such as C++). - Updates to sandbox/ref99/README.md. commit 0e7205ccef50dccd4306cf427a63633396472813 Author: Field G. Van Zee Date: Tue May 29 12:36:13 2018 -0500 Remove sandbox/.gitkeep now that dir is non-empty. commit 3a4603858e3819cbd6ed7dd67d0fc0b3f89ed254 Author: Field G. Van Zee Date: Sat May 26 15:51:08 2018 -0500 More README.md updates to sandbox/c99. Details: - Added a section that walks the reader through how to configure BLIS to use a gemm sandbox. commit 2bad97f6bdf4642884d60fc03970549902a54d74 Author: Field G. Van Zee Date: Sat May 26 15:31:16 2018 -0500 Updates to CREDITS, sandbox/c99/README.md. commit 2b4a447526effa3e847a7e5c15c3758573f12318 Author: Field G. Van Zee Date: Fri May 25 18:51:23 2018 -0500 Initial implementation of c99 "reference" sandbox. Details: - Added a c99 sandbox (in sandbox/c99) to serve as a starting point for others looking to experiment with alternative implementations of gemm in BLIS. Note that this sandbox implementation is a first draft and will be refined over time. - Minor updates to Makefile and common.mk to restrict what source files get recompiled when sandbox files are touched. - Added an initial draft of a README.md in sandbox/c99. commit 469727d4f8a976d8713afb4d0b6235c322498db0 Author: Field G. Van Zee Date: Fri May 25 16:17:13 2018 -0500 Very minor comment updates. commit 66dbe69a0f9359bf1e39b5672ee365213de2e3ee Author: Field G. Van Zee Date: Fri May 25 15:45:53 2018 -0500 Converted macros to static funcs in _packm_cntl.h. Details: - Converted various macros in frame/1m/packm/bli_packm_cntl.h (designed to access fields of a packm_params_t struct) to static functions. commit 22deef2f5463a47e3b3c37fc313d17550f10ee06 Author: Field G. Van Zee Date: Thu May 24 14:28:55 2018 -0500 Support alternative gemm implementation sandboxes. Detail: - configure: - add support for --enable-sandbox=NAME to configure script, where NAME is a subdirectory of a new 'sandbox' directory that contains an alternative implementation of gemm. (For now, only implementations of gemm may be provided via a sandbox.); - add support for C++ compiler. C++ compilers are handled in a manner similar to that of C compilers, in that a default search order is used, and that CXX is searched for first, if the variable is set. In practice, the C++ compiler that is selected should correspond to the selected C compiler. (Example: If gcc is selected for C, g++ should be selected for C++.) The result of the search is output to config.mk via build/config.mk.in. NOTE: The use of C++ in BLIS is still hypothetical, but may eventually move to being experimental. This support was intended only for use of C++ within a gemm sandbox. - build/config.mk.in: - define SANDBOX variable containing sandbox subdirectory name. - build/bli_config.in: - define either of the BLIS_ENABLE_SANDBOX or BLIS_DISABLE_SANDBOX macros in bli_config.h. - common.mk: - include makefile fragments that were propagated into the specified sandbox subdirectory; - generate different CFLAGS for sandboxes, as well as a separate CXXFLAGS variable for sandboxes when C++ source files are compiled; - isolate into a single location lists of file suffixes for various purposes. - reorganized/clean up code related to identifying header files and paths. - Makefile: - generate object filepaths for and compile source code files found in sandbox sub-directory; - remove makefile fragments placed in sandbox sub-directory (cleanmk); - various other cleanups. - Added .cc, .cpp, and .cxx to list of suffixes of files to recognize in makefile fragments (via build/gen-make-frags/suffix_list). - Updated blis.h to conditionally #include bli_sandbox.h (via a new file, bli_sbox.h), which each sandbox is assumed to use for any type definitions and function prototypes it wishes to export out to blis.h. - Conditionally disable bli_gemmnat() implementation in frame/3 when BLIS_ENABLE_SANDBOX is defined. commit 25e3501ed57a0db7f860c88b7199b36049aec12a Merge: 216a4cb9 5140ee34 Author: Field G. Van Zee Date: Thu May 24 13:57:16 2018 -0500 Merge branch 'master' into dev commit 5140ee3424c744981a3fed3b5a748ebbfc111388 Author: Field G. Van Zee Date: Wed May 23 16:56:14 2018 -0500 Updated types of bli_is_[un]aligned_to() functions. Details: - Changed the void* arguments of the following static functions: bli_is_aligned_to() bli_is_unaligned_to() bli_offset_past_alignment() to siz_t, and the return type of bli_offset_past_alignment() from guint_t to siz_t. This allows for more versatile usage of these functions (e.g. when aligning both pointers and leading dimension). - Updated all invocations of these functions, mostly in kernels/penryn but also in kernels/bgq, to include explicit typecasts to siz_t when pointer arguments are passed in. - Thanks to Devin Matthews for pointing out this potential bug (via issue #211). - Deleted a few trailing spaces in various penryn kernels. - Removed duplicate instances of the words "derived" and "THEORY" from various kernel license headers, likely from a malformed recursive sed performed long ago. commit 216a4cb9cb87fa4c93f6ceb6ae90602e5018b305 Author: Field G. Van Zee Date: Fri May 18 18:47:03 2018 -0500 Minor update to flatten-headers.[py|sh] help text. Details: - Fixed a typo and removed some outdated language from the help text of flatten-headers.py and flatten-headers.sh. commit 962a706a6f56ea070ac4683f0af69c7e59af8ecb Author: Field G. Van Zee Date: Fri May 18 18:19:40 2018 -0500 Updated LICENSE file to mention HP Enterprise. Details: - Added HP Enterprise to the LICENSE file. Previously, only the source files touched by HPE contained the corresponding copyright notices. (This oversight was unintentional.) - Updated file-level copyright notices to include a comma, to match the formatting used for UT and AMD copyrights. commit efa43e13effe901ad31e734ac90f027e89473bd9 Author: Field G. Van Zee Date: Fri May 18 12:20:40 2018 -0500 More updates to CREDITS and RELEASING files. commit f94ab97af8e86baf9ee9a9cbaef8bb3712df2e11 Author: Field G. Van Zee Date: Thu May 17 17:45:31 2018 -0500 Update to CREDITS file. commit 4919b10c005e006a6d818eb8f865f9dbd8aa16df Author: Field G. Van Zee Date: Thu May 17 16:38:49 2018 -0500 Minor changes to README.md and CONTRIBUTING.md. commit b89451187e8321b673a1cf7603c8d48028d9d4c8 Author: Field G. Van Zee Date: Thu May 17 16:23:06 2018 -0500 README.md update. Details: - Added "Contributing" section with relevant links. commit af244194e7d76276a1b90fe59f9307dde0429e1d Author: Field G. Van Zee Date: Thu May 17 15:38:02 2018 -0500 Removed explicit critical sec. from bli_memsys.c. Details: - Removed critical sections protecting the initialization/finalization of bli_memsys.c. These synchronization mechanisms are no longer needed now that BLIS initializes all APIs via pthread_once(). commit 10c9e8f95254d8c6436c4d3cb093fa5544b45c90 Author: Field G. Van Zee Date: Thu May 17 15:22:51 2018 -0500 Cache hardware's arch_t id after querying once. Details: - Added logic to bli_arch.c that will call what was previously the body of bli_arch_query_id() only once and then cache the value in a static variable local to the file. (Previously, the arch_t associated with the hardware/configuration was queried every time bli_arch_query_id() was called, which was at least once per level-3 function call. Thanks to Devin Matthews for suggesting this feature via issue #175. - Added -lpthread to the compile/link command line of the compiler invocation that compiles build/detect/config/config_detect.c, which prints the string identifying the detected configuration, since it is now needed due to new pthread_once() logic in bli_arch.c. - Implementation note: I chose to implement this arch_t caching feature via pthread_once(), using a separate pthread_once_t variable local to the file, rather than calling bli_init_once(). The reason is that I did not want to require bli_init() as a prerequisite to this function. bli_init() already calls several sub-components, some of which make use of bli_arch_query_id(), and therefore it would be easy to fall into a circular self-init situation (which usually causes pthreads to hang indefinitely). commit f28a15293890ac6fbceac229fd204dbc9fec6e27 Author: Francisco Igual Date: Thu May 17 09:26:14 2018 +0000 Fixed clobber list bug in ARMv8 ukernel commit 2e31dd7852b4d6a9355899cf9659d4b8130461cb Author: Field G. Van Zee Date: Wed May 16 17:28:33 2018 -0500 Inserted missing integer typecasting into ukernels. Details: - Inserted missing safeguards into most microkernels to ensure that the integers read by the microkernel's assembly instructions are of the appropriate size. In many cases, this bug was going undetected likely because the compiler was inserting zero padding before the integers in the calling function, allowing the assembly code to read 64-bits in a way that did not corrupt the "lower" 32 integer bits with garbage in the higher bits. Thanks to Francisco Igual and Devangi Parikh for finding this issue. commit 12dfa9516428b4092554f0ce70b07571d35de222 Author: Field G. Van Zee Date: Wed May 16 12:46:57 2018 -0500 Fixed a bug in determining default integer size. Details: - Fixed a bug that would cause configurations to inadvertantly define their integers to be 32 bits when those environments actually call for 64-bit integers. While either BLIS_ARCH_64 or BLIS_ARCH_32 is defined in bli_system.h (based on whether preprocessor macros such as __x86_64 or __aarch64__ are defined by the environment), bli_system.h was being #included *after* bli_config_macro_defs.h, in which the BLIS_ARCH_64 macro was used to choose an integer type size in the event that BLIS_INT_TYPE_SIZE was not already defined by configure via bli_config.h. And due to the structure of the cpp code in that file, the 32-bit integer case was being chosen. Thanks to Francisco Igual and Devangi Parikh for their help in isolating this bug. - Moved the #include of hbwmalloc.h and related preprocessor code to bli_kernel_macro_defs.h to facilitate the reshuffling of the #include for bli_system.h in blis.h. commit f930cec0f35824c0f9ebbd218614209217d491cb Author: Field G. Van Zee Date: Tue May 15 17:47:08 2018 -0500 More tweaks to CONTRIBUTING.md. commit 173e30ff7d293ba31f3fab8ab0c0a695eda3d4fd Author: Field G. Van Zee Date: Tue May 15 14:48:34 2018 -0500 Added initial draft of CONTRIBUTING.md file. Details: - Thanks to the Ruby on Rails project for providing a good template off of which to build. commit 6e25e758b444bf725046674e1e64c6a52421749d Author: Nico Schlömer Date: Tue May 15 14:03:20 2018 +0200 Debian config (#206) * add debian config * correct wording in the README commit fcf6c6a3c87da08a7cdb92b102489b991ef7a644 Author: Alex Arslan Date: Mon May 14 18:41:03 2018 -0700 Fix shared library builds on platforms other than Linux and macOS (#209) * Fix detection of systems other than Linux and macOS The way the logic is currently laid out, any platform that isn't Linux gets assigned the .dylib shared library extension and the macOS-specific compiler flags. This reverses the logic to check for macOS first, and have the fallback use the Linux definitions, which apply to most other systems as well. * Use SHLIB_EXT instead of SO_SUF The former is more standard, as jakirkham pointed out in a comment. commit 6f7f51048c48f31d691c06451d0fd2cbc453ad03 Author: Field G. Van Zee Date: Mon May 14 18:41:56 2018 -0500 Echo cc_vendor when printing compiler version. Details: - Echo the ${cc_vendor} when informing the user of the compiler's version. Previously, the actual ${cc} (which could be a path to the executable) was being printed, which has already been printed by that point in the configure script. commit ad67dc4e348b0a381efc057573a6b03cc7e26db0 Author: Field G. Van Zee Date: Mon May 14 18:35:28 2018 -0500 Communicate cc, cc_vendor to make via config.mk. Details: - Historically, the compiler selection has happened statically in the various make_defs.mk and would only be overriden by setting CC (either prior to running configure or as a configure argument). However, in the last couple months, configure has evolved to contain rather sophisticated compiler detection logic for the purposes of blacklisting sub-configurations. It only makes sense that configure now fully take over the responsibility of selecting a compiler from the GNU make side of the build system. Thanks to Alex Arslan for his help exposing this issue. - Substitute found_cc into CC in config.mk via configure. - Set a new variable, CC_VENDOR, in config.mk via substitution from configure, and disable the corresponding CC_VENDOR code in common.mk. - Disabled default compiler selection (usually gcc) in the sub-configs' various make_def.mk files. commit 20af119fc97ec6120017a7a5ba5f9aaa920c7640 Author: Field G. Van Zee Date: Mon May 14 17:44:58 2018 -0500 Added README.md to 'config' directory. Details: - Added a brief README.md file to the config directory to redirect those who may be exploring the source tree to the ConfigurationHowTo wiki. (Included is a very brief explanation of configurations for those who don't have time to read the wiki.) Thanks to Nico Schlömer for this suggestion. commit 9dbce16269c3e1f27c7a0d64372cc76aed30dfc1 Author: Field G. Van Zee Date: Mon May 14 17:04:54 2018 -0500 Search for 'cc clang gcc' on OpenBSD, FreeBSD. Details: - Swapped gcc and clang in the compiler search list for OpenBSD. - Use the same search list for FreeBSD as above. commit 55ebf24d63128b5fd15b10160485667415a02a55 Author: Field G. Van Zee Date: Mon May 14 16:19:08 2018 -0500 Change compiler search order on OpenBSD. Details: - Set a compiler search list (and order) as a function of the OS detected via 'uname -s'. By default, this list and order is 'gcc clang cc' for Linux and Darwin (OS X), and any other OS except OpenBSD). On OpenBSD, we use 'cc gcc clang' because OpenBSD's default installation of gcc (4.2.1) is too old for BLIS. Thanks to Alex Arslan for reporting this issue and suggesting a fix. commit 4fb353bd90e6642c8aeffd1b1e6329f54eee4bb4 Merge: 4b36e85b 8a2857b5 Author: Field G. Van Zee Date: Sun May 13 17:50:51 2018 -0500 Merge branch 'master' into dev commit 8a2857b5e3c633b18c24f2275110437a702a71d0 Author: Field G. Van Zee Date: Fri May 11 18:42:05 2018 -0500 Fixed README.md typo; mention 'make check'. commit 543935c02f9335142d2e485a15f37dbaebe012ed Author: Field G. Van Zee Date: Fri May 11 18:35:32 2018 -0500 Updated README.md with Ubuntu packages link. Details: - Created a separate section of README.md for external packages, with one bullet each for Dave Love's rpms and Nico Schlömer's Ubuntu apt packages. Thanks to Dave and Nico for their contributions. commit af1d8470b56d3b2a1c8513d366d788dddcb84baa Author: Field G. Van Zee Date: Fri May 11 17:49:58 2018 -0500 Better handling of shared libraries on OS X. Details: - Use the .dylib shared library suffix on OS X (instead of .so in Linux). - Link with the -dynamiclib and -install_name options on OS X (instead of -shared and -soname in Linux). - Determine operating system (e.g. Linux, Darwin) during configure and substitute into config.mk.in rather than run 'uname -s' during make. - Echo operating system during configure. commit 4b72a462d7467cf815422aafac7b05037d2e3b13 Author: Field G. Van Zee Date: Thu May 10 18:35:38 2018 -0500 Enable building shared library by default. Details: - Tweaked configure so that the shared library is generated by default. - Updated --help text and configure's feedback messages reporting the status of the static/shared builds. - Changed the order of build product installation so that headers are installed last, after libraries and symlinks. commit b699bb1ff03c6e9baaa054805b4939983ae7145b Author: Field G. Van Zee Date: Thu May 10 15:54:17 2018 -0500 Adopt Linux-like .so versioning at install-time. Details: - Changed the naming conventions used for installed libraries and symlinks to more closely mirror patterns used by typical GNU/Linux libraries. Whereas previously static and shared libraries were installed and symlinked as follows: (library) libblis-0.3.2-15-haswell.a (library) libblis-0.3.2-15-haswell.so (symlink) libblis.a -> libblis-0.3.2-15-haswell.a (symlink) libblis.so -> libblis-0.3.2-15-haswell.so we now use the following naming conventions: (library) libblis.a (symlink) libblis.so -> libblis.so.0.1.2 (symlink) libblis.so.0 -> libblis.so.0.1.2 (library) libblis.so.0.1.2 where 0.1.2 indicates shared library major, minor, and build versions of 0, 1, and 2, respectively. The conventional version string can still be queried by linking to the library in question and then calling bli_info_get_version_str(). (The testsuite binary does this automatically at startup.) - Added logic to common.mk to set the soname field in the shared library via the -soname linker flag. - Added a 'so_version' file to the top-level directory containing two lines. The first line specifies the .so major version number, and the second line specifies the minor and build version numbers joined with a '.'. This file is read by configure and those values substituted into build/config.mk.in to define SO_MAJOR, SO_MINORB, and SO_MMB variables. commit fc2d9ec6bf46f6e5b19d196208415ce433e95b10 Author: Field G. Van Zee Date: Wed May 9 15:19:28 2018 -0500 Tweaks to top-level clean and distclean targets. Details: - Moved the removal of bli_config.h from cleanh to distclean. - Removed cleantest as a dependency of clean. commit bf0350305971e3991861b5117a13fda31ff97b6d Author: Field G. Van Zee Date: Tue May 8 16:49:22 2018 -0500 Renamed (shortened) a few build system variables. Details: - Renamed the following variables in config.mk (via build/config.mk.in): BLIS_ENABLE_VERBOSE_MAKE_OUTPUT -> ENABLE_VERBOSE BLIS_ENABLE_STATIC_BUILD -> MK_ENABLE_STATIC BLIS_ENABLE_SHARED_BUILD -> MK_ENABLE_SHARED BLIS_ENABLE_BLAS2BLIS -> MK_ENABLE_BLAS BLIS_ENABLE_CBLAS -> MK_ENABLE_CBLAS BLIS_ENABLE_MEMKIND -> MK_ENABLE_MEMKIND and also renamed all uses of these variables in makefiles and makefile fragments. Notice that we use the "MK_" prefix so that those variables can be easily differentiated (such as via grep) from their "BLIS_" C preprocessor macro counterparts. - Other whitespace changes to build/config.mk.in. - Renamed the following C preprocessor macros in bli_config.h (via build/bli_config.h.in): BLIS_ENABLE_BLAS2BLIS -> BLIS_ENABLE_BLAS BLIS_DISABLE_BLAS2BLIS -> BLIS_DISABLE_BLAS BLIS_BLAS2BLIS_INT_TYPE_SIZE -> BLIS_BLAS_INT_TYPE_SIZE and also renamed all relevant uses of these macros in BLIS source files. - Renamed "blas2blis" variable occurrences in configure to "blas", as was done in build/config.mk.in and build/bli_config.h.in. - Renamed the following functions in frame/base/bli_info.c: bli_info_get_enable_blas2blis() -> bli_info_get_enable_blas() bli_info_get_blas2blis_int_type_size() -> bli_info_get_blas_int_type_size() - Remove bli_config.h during 'make cleanh' target of top-level Makefile. commit 4b36e85be9b516b4089b24768f881dd976668997 Author: Field G. Van Zee Date: Tue May 8 14:26:30 2018 -0500 Converted function-like macros to static functions. Details: - Converted most C preprocessor macros in bli_param_macro_defs.h and bli_obj_macro_defs.h to static functions. - Reshuffled some functions/macros to bli_misc_macro_defs.h and also between bli_param_macro_defs.h and bli_obj_macro_defs.h. - Changed obj_t-initializing macros in bli_type_defs.h to static functions. - Removed some old references to BLIS_TWO and BLIS_MINUS_TWO from bli_constants.h. - Whitespace changes in select files (four spaces to single tab). commit 7e5648ca150757b874f6823da832f3798c40b9f9 Author: Field G. Van Zee Date: Mon May 7 18:59:19 2018 -0500 Add configure support for --libdir, --includedir. Details: - Added support for two new configure options: --libdir and --includedir. They specify the precise install directories for libraries and header files, respectively, and override any location implied by the --prefix option (including the default install prefix, if --prefix was not given). Thanks to Nico Schlömer for suggesting this via issue #195. - Removed the INSTALL_PREFIX definition/anchor from build/config.mk.in and replaced it with corresponding definitions/anchors for libdir and includedir. - Updated top-level Makefile to use the new variables, INSTALL_LIBDIR and INSTALL_INCDIR, instead of INSTALL_PREFIX (which is now no longer needed by make). - Set default sane values for INSTALL_LIBDIR and INSTALL_INCDIR in common.mk when configure has not been run, as is already done for DIST_PATH. This is to safeguard against statements in the top-level Makefile that use 'find' to locate old libraries and headers for the uninstall targets, which run regardless of make target. Without setting INSTALL_LIBDIR and INSTALL_INCDIR, those variables are empty and the 'find' ends up looking at '/', which is obviously not what we want. (Also enclosed those definitions in an IS_CONFIGURED guard so that they won't get evaluated unless configure has been run.) - Rearranged "ifeq ($(IS_CONFIGURED),yes)" conditionals in Makefile to reduce occurrences and separated "local" and top-level components of cleanblastest and cleanblistest targets to improve readability. - Adjusted out-of-tree builds so that they are no longer oblivious to the .git directories, if present, and thus now properly augment version strings with the appropriate patch number. - Include missing version string in 'configure --help' output. commit b09e4e8852a6c42895910e3bcb9041124dc8bf9f Author: Field G. Van Zee Date: Mon May 7 14:37:50 2018 -0500 Allow 'make clean' and friends without configuring. Details: - Modified top-level Makefile so that a user can run 'make distclean', 'make clean', or any of the other clean-related targets prior to running configure (or after a previous 'make distclean'). Thanks to Nico Schlömer for suggesting this via issue #197. - Made the cleanblastest and cleanblistest more comprehensive in that they now clean out build products that would have resulted from local compilation (ie: builds performed within the 'blastest' or 'testsuite' directories). - Added "cc" to list of expected compiler "vendors" since the CC variable seems to automatically be set to "cc" on Ubuntu 16.04 (which is just an alias to gcc). - Comment update to build/config.mk.in. commit 35c5a1449c3efe0b2ec43cdefcfdf00e71828149 Author: Field G. Van Zee Date: Mon May 7 12:04:57 2018 -0500 No longer update version file during configure. Details: - Recycled the core functionality of build/update-version-file.sh into a function in configure, disabling the updating of the 'version' file in the process. Instead of writing the patched version string back to the version file and then reading it again from within configure, the patched version string is now saved directly to a variable in the main() function in configure. This will prevent developers from accidentally committing configure-induced changes to the version file in between releases. commit 8adb2f919b62da4a2885ae04a10925e0e6a2e304 Author: Mathieu Poumeyrol Date: Sun May 6 19:58:16 2018 +0200 Some cross compilations fixes (#198) * cross-compilation fixes * add doc ranlib variable * icc support -dumpversion, posix compatible test, plus one stupid mistake * retab * revert version as requested commit 89acd9ebe516eeb97006dba344354bfc98826645 Merge: 4cff432d 0557eba7 Author: Field G. Van Zee Date: Wed May 2 12:53:35 2018 -0500 Merge branch 'amd' commit 4cff432d707891ada705b039a7e043558bbf3c51 Author: Nisanth M P <31736542+nisanthmpamd@users.noreply.github.com> Date: Wed May 2 23:20:42 2018 +0530 AMD specific optimizations for target 'zen' (#194) Re-enabled AMD-specific optimizations for zen. Details: - Re-enabled Zen-specific cache blocksizes for 'zen' sub-configuration. - Re-enabled small matrix gemm optimization for 'zen'. - These were both temporarily disabled during a previous merge simply due to lack of Zen hardware for testing. commit 8eda5fe7f678b413cb274bd84716995a7d0b87a9 Author: Field G. Van Zee Date: Wed May 2 12:20:37 2018 -0500 Typo fix in README.md. commit 0557eba78f5fcf28f0f039f28da79498ffde848c Author: Nisanth M P Date: Mon Mar 19 12:49:26 2018 +0530 Re-enabling the small matrix gemm optimization for target zen Change-Id: I13872784586984634d728cd99a00f71c3f904395 commit df78ceb3d6f33a27fe69017854405edaea7c40e5 Author: Nisanth M P Date: Mon Mar 19 11:34:32 2018 +0530 Re-enabling Zen optimized cache block sizes for config target zen Change-Id: I8191421b876755b31590323c66156d4a814575f1 commit 5e515f9a76f4aaf43dc21315a34d797726ca8069 Author: Field G. Van Zee Date: Tue May 1 13:44:10 2018 -0500 Tweaked new language in README.md. commit 1ddd9e316ad5024af8b606dfcebd1e7d587a130f Author: Field G. Van Zee Date: Tue May 1 13:36:28 2018 -0500 Added link to Dave Love's Fedora Copr page. Details: - Added a blurb to README.md advertising Dave Love's Copr homepage, which contains rpm packages for RHEL/Fedora-like distributions. commit 078a852f738c66c6468bd5e64b06467edc9057fd Author: Field G. Van Zee Date: Mon Apr 30 16:15:26 2018 -0500 Minor tweaks to top-level 'make clean' target. Details: - Execute 'cleanh' target as part of 'clean' - Remove cblas.h file from 'include//' as part of 'cleanh' target. - Updated the echoed (non-verbose) text for uniformity. commit 75d0d1057dda69c655bd1cd8f791cb39b54d99b8 Author: Field G. Van Zee Date: Mon Apr 30 14:57:33 2018 -0500 Renamed various datatype-related macros/functions. Details: - Renamed the following macros in bli_obj_macro_defs.h and bli_param_macro_defs.h: - bli_obj_datatype() -> bli_obj_dt() - bli_obj_target_datatype() -> bli_obj_target_dt() - bli_obj_execution_datatype() -> bli_obj_exec_dt() - bli_obj_set_datatype() -> bli_obj_set_dt() - bli_obj_set_target_datatype() -> bli_obj_set_target_dt() - bli_obj_set_execution_datatype() -> bli_obj_set_exec_dt() - bli_obj_datatype_proj_to_real() -> bli_obj_dt_proj_to_real() - bli_obj_datatype_proj_to_complex() -> bli_obj_dt_proj_to_complex() - bli_datatype_proj_to_real() -> bli_dt_proj_to_real() - bli_datatype_proj_to_complex() -> bli_dt_proj_to_complex() - Renamed the following functions in bli_obj.c: - bli_datatype_size() -> bli_dt_size() - bli_datatype_string() -> bli_dt_string() - bli_datatype_union() -> bli_dt_union() - Removed a pair of old level-1f penryn intrinsics kernels that were no longer in use. commit 01c4173238baf08e7f6700a3f91a2ea58cca50c1 Author: Field G. Van Zee Date: Sat Apr 28 14:07:34 2018 -0500 CHANGELOG update (0.3.2) commit 2fb440876690bdcec0c11a30e2b33ad100bab529 Author: Field G. Van Zee Date: Sat Apr 28 14:07:31 2018 -0500 Version file update (0.3.2) commit cdf041ddadd8725e578e2f59f37ae341f26655af Author: Field G. Van Zee Date: Sat Apr 28 14:05:00 2018 -0500 Use config.mk instead of common.mk in bump-version.sh. Details: - Fixed inadvertent targeting of common.mk when testing whether configure had already been run, rather than config.mk. commit 6ded8f9f0364b3c07255e2532ada3eeb2ed2a715 Author: Field G. Van Zee Date: Sat Apr 28 14:01:29 2018 -0500 Account for recent 'make distclean' in bump-version.sh. Details: - Added logic to build/bump-version.sh that will run './configure auto' if 'common.mk' is not present (usually because 'make distclean' was run recently). commit 7c16fdce433f5dea0e83d5047553c955d8e46fd2 Author: Field G. Van Zee Date: Sat Apr 28 13:50:55 2018 -0500 Fixed typo in RELEASING file. commit 5e5ca4984fcf6d72d3036c338bb9cdc64520a325 Author: Field G. Van Zee Date: Sat Apr 28 13:48:01 2018 -0500 README updates. Details: - Updates to the top-level README files in the top-level directory as well as the 'examples/oapi' directory. commit 627b045e301defea6770dc5b64e1110cbec25153 Author: Field G. Van Zee Date: Fri Apr 27 18:11:19 2018 -0500 Added an example of using transposition with gemm. Details: - Added an example to examples/oapi/8level3.c to show how to indicate transposition when performing a gemm operation. commit 13a0eadc69d72933e322901f5b44944834e3c787 Author: Field G. Van Zee Date: Fri Apr 27 18:00:07 2018 -0500 Added more transposition/conjugation examples. Details: - Added code to examples/oapi/5level1m.c that demonstrates transposing (and conjugate-transposing) unstructured matrices. - Comment updates to 6level1m_diag.c to maintain consistency with new examples in 5level1m.c. commit 5606cd8881e75264a96af45dc8ea1905bab054f5 Author: Field G. Van Zee Date: Fri Apr 27 17:13:10 2018 -0500 Added utility module to examples/oapi. Details: - Added a new code example file to examples/oapi demonstrating how to use various utility operations. - Comment updates to other example files. - README updates. commit ff26c94c6486374c709f93c6965ea18903bd6a18 Author: Field G. Van Zee Date: Fri Apr 27 12:31:34 2018 -0500 Added missing gcc version constraint for knl. Details: - Previously forgot to add explicit enforcement of a minimum gcc version in configure script when 'knl' sub-configuration is requested. - Comment updates to configure. commit 4d97574e477b3e55ddbb6044b0542a92cd9bab30 Author: Field G. Van Zee Date: Tue Apr 24 18:48:09 2018 -0500 Added object API example code. Details: - Added an 'examples' directory at the top level. - Added an 'oapi' subdirectory in 'examples' that contains a tutorial-like sequence of example code demostrating the core functionality of BLIS's object-based API, along with a Makefile and README. Thanks to Victor Eijkhout for being the first to suggest including such code in BLIS. commit d6ab25a3232aa52b9b855088fb4b0b46ff2c00c8 Author: Field G. Van Zee Date: Tue Apr 24 18:43:03 2018 -0500 Add setijm, getijm operations. Details: - Added bli_setgetijm.c, which defines bli_setijm(), bli_getijm(), and related functions that can be used to read and write individual elements of an obj_t. - Defined a new function, bli_obj_create_conf_to(), in bli_obj.c that will create a new object with dimensions conformal to an existing object. Transposition and conjugation states on the existing object are ignored, as are structure and uplo fields. - Defined a new function, bli_datatype_string(), in bli_obj.c that returns a char* to a string representation of the name of each num_t datatype. For example, BLIS_DOUBLE is "double" and BLIS_DCOMPLEX is "dcomplex". BLIS_INT is included (as "int"), but BLIS_CONSTANT is not, and thus is not a valid input argument to bli_datatype_string(). - Added calls to bli_init_once() to various functions in bli_obj.c, the most important of which was bli_obj_create_without_buffer(). - Removed unintended/extra newline from the end of printv output. - Whitespace changes to - frame/base/bli_machval.c - frame/base/bli_machval.h - frame/0/copysc/bli_copysc.c - Trivial changes to README.md and common.mk. commit a731a428f7fc02fd6ab4f953ead828c1d06fb5a1 Author: Field G. Van Zee Date: Tue Apr 17 16:44:55 2018 -0500 Another README.md update. commit c734ee928a824b27d280a9a67b1b4bc8423d5795 Author: Field G. Van Zee Date: Tue Apr 17 16:40:05 2018 -0500 README.md update. commit 03ecad372d8eb603ee905a7b944d0544a813460a Author: Field G. Van Zee Date: Tue Apr 17 14:16:59 2018 -0500 Added RELEASING file. Details: - Added a file named 'RELEASING' that contains basic notes on how to create a new version/release of BLIS. This is mostly just a reminder to myself, but also may become useful if/when others take over development and administration of the project. commit 24b3c3149ce66546b9a1afc2cc794a637a86aa60 Merge: 60366a3f 817b67c0 Author: Field G. Van Zee Date: Mon Apr 16 18:49:38 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 60366a3faba4e60cee85c3b87a3f69625f4b9026 Author: Field G. Van Zee Date: Mon Apr 16 18:46:21 2018 -0500 Updates to knl kernels and related code. Details: - Imported the 24x16 knl sgemm microkernel (and its corresonding spackm kernel) from TBLIS and enabled its use in the knl sub-config. Also Added sgemm microkernel prototype to bli_kernels_knl.h. - Updated dgemm and dpackm microkernels from TBLIS, which included an important change regarding the offsets array (changed from extern declaration to static declaration/definition). - Activated use of level-1v and -1f zen kernels in skx and knl sub-configs. - Removed some old macros no longer needed in bli_family_skx.h now that libmemkind support exists in configure. - Moved bli_avx512_macros.h to frame/include and adjusted #includes in skx and knl kernels accordingly. - Moved unused kernels in kernels/knl/3 to kernels/knl/3/other directory. - Fixed a minor bug in the 'make' output per compile when verboseness is not turned on. The rule-generating function 'make-kernel-rule' was previously passing in the name of the config, rather than the name of the kernel set returned by get-config-for-kset, which could give misleading information to the user when the kconfig_map mapped a kernel set to a sub-configuration that did not share the same name. (This didn't affect the CFLAGS that were actually used.) - Updated test/3m4m/Makefile, removing acml targets and renaming the remaining targets. commit 817b67c01752e0ca8fe230bb8ad23afc7bd0f64e Merge: 67c9c2f8 2b7108a8 Author: Field G. Van Zee Date: Mon Apr 16 14:06:26 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 67c9c2f86d5ef2accc439b21581d73d82754a2e3 Author: Field G. Van Zee Date: Mon Apr 16 14:03:12 2018 -0500 Retired haswell gemm microkernels. Details: - Moved microkernels in kernels/haswell/3 to kernels/haswell/3/old. These microkernels were no longer being used and only sowed confusion to anyone inspecting the repository without being fully cognizant of the build system and how it works (and sometimes even to those who wrote the build system). Note that the haswell configuration currently employs the zen microkernels. commit 2b7108a8ef8ce958b3acad028ff07c85ff97fd63 Author: Field G. Van Zee Date: Mon Apr 16 12:35:53 2018 -0500 Minor updates to test driver makefiles. Details: - Cleaned up and homogenized the various test driver Makefiles in testsuite and test directories. - Very minor updates to test driver code. commit 9f56df95570a24587b910b169f342bd356ccbfb6 Author: Field G. Van Zee Date: Wed Apr 11 14:51:36 2018 -0500 Trivial tweaks to configure blacklisting output. Details: - Updated output of information vis-a-vis configuration blacklisting. commit f56481efebd9a7785c0618f3a12c0bec36f46333 Author: Field G. Van Zee Date: Tue Apr 10 19:02:21 2018 -0500 Cleaned up assembler version query on OS X. Details: - Swiched from querying version of 'objdump' to 'as' (e.g. the assembler). - Fixed the outputting of the version of 'as' on OS X, which required this beauty: ...=$(as -v /dev/null -o /dev/null 2>&1) - Only add sub-configs to blacklist if the sub-config hasn't already been added. commit 088c474e629535affbe111f141f895af50d109be Author: Field G. Van Zee Date: Tue Apr 10 18:09:56 2018 -0500 Added support for blacklisting via the assembler. Details: - Added logic to configure that attempts to assemble various small files containing select instructions designed to reveal whether binutils (specifically, the assembler) supports emitting those instruction sets. This information provides additional opportunities to blacklist sub- configurations that are unsupported by the environment. Thanks to Devin Matthews for pointing me towards a similar solution in TBLIS as an example. - Various other cleanups in configure. - Reorganized the detection code in the 'build' directory, bringing the "auto-detect" configuration detection, libmemkind detection, and new instruction set detection codes into a single new subdirectory named 'detect'. commit 78a24e7dada52a3582f8488795bd1a44993989d9 Author: Field G. Van Zee Date: Mon Apr 9 17:02:13 2018 -0500 Updated bli_avx512_macros.h in knl and skx configs. Details: - Downloaded updated version of bli_avx512_macros.h from TBLIS [1] in attempt to address issue #192. [1] https://github.com/devinamatthews/tblis/ commit 388f64d6ade14caa4a6c286845ad2d565378b2bb Author: Field G. Van Zee Date: Mon Apr 9 15:33:10 2018 -0500 Fixed failure to honor CC= argument to configure. Details: - Fixed a failure to observe the value of CC when selecting the compiler in configure. Thanks to Devangi Parikh for reporting this bug. - The semantics now also work for the CC environment variable. That is, if CC is set prior to running configure, that value is used, but will be overridden by specifying the CC= argument to configure. If the CC environment variable is not set, the CC= value is used. If neither the environment variable nor CC= are specified, then the choice is made internally to configure: first attempting to find gcc, then clang, and then cc. commit 45fbe66b3e2ab92f0b4fdf437d57c5d06603803d Author: Field G. Van Zee Date: Mon Apr 9 14:01:08 2018 -0500 Fixed libmemkind dependency for x86_64. Details: - Removed some old conditional code in config/knl/make_defs.mk that added -lmemkind to LDFLAGS if DEBUG_TYPE was not 'sde' and inserted code into common.mk that affirmatively filters out -lmemkind from LDFLAGS if DEBUG_TYPE is 'sde'. (Thanks to Dave Love for reporting this issue.) Other minor cleanups to neighboring code in common.mk. - Updated CRVECFLAGS in knl/make_defs.mk to be based on -march=knl, and then AVX-512 functionality is manually removed via various -mno-avx512* flags. Also, make the setting of CRVECFLAGS conditional on CC_VENDOR. Similar change to skx/make_defs.mk. - Comment/whitespace updates. commit ca982148b3b419db063cad2fa74376ec383a5c80 Author: dnp Date: Sun Apr 8 21:27:10 2018 -0500 Fixed bug in SKX sgemm microkernel. Modified SKX dgemm mircokernel to be consistent with the sgemm microkernel commit bd0276752ccdd56ff897b1a5ae022f2ffe6e0b38 Author: Field G. Van Zee Date: Fri Apr 6 18:51:43 2018 -0500 Track separate ref kernel flags for each sub-config. Details: - Renamed CVECFLAGS variables in sub-configurations' make_defs.mk files to CKVECFLAGS. - Added default defintions of two new make variables to most sub- configurations' make_defs.mk files--CROPTFLAGS and CRVECFLAGS-- which correspond to reference kernel analogues of the CKOPTFLAGS and CKVECFLAGS, which track optimization and vectorization flags for optimized kernels. Currently, two sub-configurations (knl and skx) explicitly set CRVECFLAGS to non-default values (using AVX2 instead of AVX-512 for reference kernels. Thanks to Jeff Hammond, whose feedback prompted me to make this change (issue #187). - Changed common.mk so that the get-refkern-cflags-for function returns the flags associated with the given sub-configuration's CROPTFLAGS and CRVECFLAGS (instead of CKOPTFLAGS and CKVECFLAGS). commit b9aebce19480448817373e2df2b36bd090eae41a Author: Field G. Van Zee Date: Fri Apr 6 18:37:33 2018 -0500 De-verbosify makefile fragment generation. Details: - Changed from -v1 to -v0 when calling gen-make-frag.sh from configure. The directory-by-directory recursive output didn't add much value to the user, so now we just echo a line for each top-level directory into which we will recurse (e.g. 'config', 'ref_kernels', 'frame', etc.). This also helps keep more interesting information (from earlier in the execution of configure) from scrolling out of the terminal window. commit b549b91f26948991e13364f1f26a878da0f43aa0 Author: Field G. Van Zee Date: Fri Apr 6 16:31:33 2018 -0500 Added 64-bit integer support to BLAS test drivers. Details: - Updated the build system and BLAS test drivers to use 64-bit integers when BLIS is configured for 64-bit integers in the BLAS layer. Also updated blastest/Makefile accordingly. Thanks to Dave Love for reporting the need for this feature. - Added a 'check' target to blastest/Makefile so that the user can see a summary of the tests. - Commented out the initial definition of INCLUDE_PATHS in common.mk, which was used pre-monolithic header, back when BLIS needed paths to *all* headers, rather than just a select few. This line is no longer needed since the value of INCLUDE_PATHS is overwritten by a later definition limited to only the header paths that are needed now. commit d39fa1c04265869bdf8b6f453076359eec2f3c59 Author: Field G. Van Zee Date: Thu Apr 5 19:38:35 2018 -0500 Adjusted CFLAGS used to compile bli_cntx_ref.c. Details: - Removed CKOPTFLAGS and CVECFLAGS from the set of CFLAGS used to compile bli_cntx_ref.c for each configuration. This is necessary because the file defines functions like bli_cntx_init_skx_ref(), which are called during BLIS's initialization of the global kernel structure, potentially being executed by an architecture that lacks the instruction set used to compile the kernels for, in this example, skx, which would lead to an illegal instruction error. Thanks to Dave Love for reporting this issue. - Further adjusted CFLAGS used when compiling code in the 'config' directory (e.g. bli_cntx_init_skx.c) as well as code in 'frame' so as to avoid the aforementioned issue. commit 08b123084d35680beab379012f8f5a5a8b44a443 Author: Field G. Van Zee Date: Thu Apr 5 14:25:39 2018 -0500 Added color-coding to 'make check' output. Details: - Added color coding to output of check-blistest.sh, check-blastest.sh scripts. Success messages are coded green and failure are coded red. This helps draw the eye toward those messages as the 'make checkblis', 'make checkblis-fast', and 'make checkblas' targets are executed. - Changed top-level Makefile so that execution will not halt if 'checkblis', 'checkblis-fast', or 'checkblas' targets fail, which means that the second of the two tests (BLIS and BLAS) run by 'make check' will run even if the first test fails. commit c9e4d7db7410b03c1ffe8c9727e9f1b2ba7fecfe Author: Field G. Van Zee Date: Wed Apr 4 17:13:15 2018 -0500 CHANGELOG update (0.3.1) commit 1f28d7c86e17730f05bd239c8e8d67e3e7510a4f Author: Field G. Van Zee Date: Wed Apr 4 17:13:15 2018 -0500 Version file update (0.3.1) commit e6cc9ee26bcf0450f1120d5d12985b04d9fb8516 Merge: 786d15c5 3c91c7ae Author: Field G. Van Zee Date: Wed Apr 4 16:08:18 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 786d15c5ef09f1f647b126b63d57e76d5810c58e Author: Field G. Van Zee Date: Wed Apr 4 16:06:47 2018 -0500 Added skx, knl to x86_64 configuration family. Details: - Added 'skx' and 'knl' sub-configurations to the 'x86_64' configuration family in the config_registry file. - Added logic to configure that avoids committing certain sub-configs to the configuration/kernel registries if those sub-configs cannot be handled properly by the chosen compiler. (This was modeled after similar logic in TBLIS's configure; thanks to Devin Matthews for pointing this out.) First, the compiler and its version are inspected and, based on the results, certain configurations are added to a "blacklist". Then, as the configuration registries are being created, configurations and/or kernels that match items in the blacklist are skipped over and not commited to the registries. Under certain circumstances, omitting a blacklisted configuration will indirectly invalidate other configurations due to the loss of availability of the original blacklisted configuration's kernel set. This additional indirect blacklist is also accounted for. - Added output to the beginning of configure that echos information about the chosen compiler as well as the configurations that are blacklisted and must be stripped from the registries. - Various other cleanups in configure, especially with respect to explicitly declaring local variables in functions. - Comment updates to config/zen/make_defs.mk regarding choice of -march flags based on compiler version. commit 3c91c7aebafb446a2582267beb3b22c8bb475b3b Author: Field G. Van Zee Date: Mon Apr 2 12:40:25 2018 -0500 Fixed 64b type mismatch warning in cblas_xerbla.c. Details: - Fixed a compiler warning concerning a type mismatch between the format specifier of the printf() call in cblas_xerbla.c and its corresponding (info) argument. The warning manifested when the CBLAS layer was enabled and the BLAS/CBLAS integer type siwas is set to 64 (the default is 32). The warning was fixed by changing the specifier from %d to %jd and typecasting the argument to intmax_t. Thanks to Dave Love for reporting this issue and submitting the patch. commit 71eaf449a812fe2bd640d21513ec83974b2edb45 Merge: 6a628184 ae9a5be5 Author: Field G. Van Zee Date: Tue Mar 27 17:21:43 2018 -0500 Merge branch 'dev' commit ae9a5be56d6f9b87278d6032154d2dcf3fb7d54f Author: dnp Date: Tue Mar 27 17:01:23 2018 -0500 Fixed bug in skx sgemm microkernel commit 3f02af0905b1e2e2e065862f8afe5e9a52f282b2 Author: Field G. Van Zee Date: Mon Mar 26 17:40:04 2018 -0500 Row storage optimizations to zen dotxf kernels. Details: - Split the main loop bodies of zen's [sd]dotxf kernels into two cases: one to handle a column-stored matrix A and one to handle a row-stored matrix A. This allows vector instructions to be employed even if A is stored by rows (and A^T appears stored as columns). Both storage cases use a common edge case loop. Thanks to Devin Matthews for this idea and for prototyping the change needed for sdotxf kernel. commit 679dcc331dd870ec680e135a3fb65ffa6e3a91c2 Author: Field G. Van Zee Date: Mon Mar 26 15:35:17 2018 -0500 Make k_iter/k_left uint64_t in bulldozer fma ukrs. Details: - Changed the declaration of k_iter and k_left for d, c, z microkernels from dim_t to uint64_t. This is needed to ensure compatibility with the movq instruction used to load the value into registers. This change should have been made a long time ago, but for some reason only recently began showing up via Travis CI. commit 6a628184f6938673440e4cdd4fed0208c51fd1f9 Author: Field G. Van Zee Date: Mon Mar 26 14:48:16 2018 -0500 Fixed a memkind-related compile-time bug on knl. Details: - Fixed a compile-time error that occurred due to the fact that BLIS_ENABLE_MEMKIND, defined in bli_config.h, was not being defined soon enough to be used in bli_system.h where it is needed to determine whether hbwmalloc.h should be #included. bli_system.h is now included after bli_config.h (and bli_config_macro_defs.h). Thanks to Dave Love for reporting this issue. - Tweaked the language used by configure to echo the status of the --with[out]-memkind option. commit e2192a8fd58ec3657434ddd407033e097edad8f4 Author: Field G. Van Zee Date: Fri Mar 23 12:53:48 2018 -0500 Removed vzeroupper intrinsics from zen kenels. Details: - Fixed a bug in the zen (also used by haswell) dotxf kernels whereby a vzeroupper instruction destoryed part of the intermediate result stored by the vdpps instructions that came right before. (The vzeroupper instrinsic was removed.) - Removed remaining vzeroupper instrinsics from other zen kernels. Previously, the vzeroupper instructions were included because BLIS is typically compiled with -mfpmath=sse. But it was brought to my attention that inserting these vzeroupper instructions is unnecessary for our purposes, since (a) -mfpmath=sse results in VEX-encoded scalar code rather than literal SSE instructions, and (b) compilers already (likely) insert vzeroupper instructions where necessary. Thanks to Devin Matthews for zeroing in on the dotxf bug. - Removed -malign-double from bulldozer make_defs.mk. This alignment was already happening by default since bulldozer is an x86_64 system. commit 22289ad23cd10b81451ce82f60d84b5f97e7fd85 Author: Field G. Van Zee Date: Thu Mar 22 18:21:30 2018 -0500 Added build system support for libmemkind. Details: - Added support for libmemkind to configure. configure attempts to detect the presence of libmemkind by compiling a small program containing #include and a call to hbw_malloc(). If successful, it is assumed that libmemkind is present and available. If present, use of libmemkind is enabled by default, and otherwise use is disabled by default. If libmemkind is present, the user may explicitly disable use of the library by running configure with the --without-memkind option. Furthermore, a configuration may disable libmemkind, perhaps conditional on some aspect of the build system, by including -DBLIS_DISABLE_MEMKIND in the configuration's CPPROCFLAGS make variable and setting the BLIS_ENABLE_MEMKIND makefile variable, set in config.mk, to 'no'. (The knl configuration makes use of this latter feature; see below.) - If enabled at configure-time, bli_system.h will #include and bli_kernel_macro_defs.h will define BLIS_MALLOC_POOL and BLIS_FREE_POOL to use hbw_malloc() and hbw_free(), respectively. - Deprecated explicit use of BLIS_NO_HBWMALLOC in config/knl/bli_family.knl.h and replaced use of -DBLIS_NO_HBWMALLOC in config/knl/make_defs.mk with -DBLIS_DISABLE_MEMKIND, which overrides (#undefs) the definition of BLIS_ENABLE_MEMKIND in bli_system.h, if it would otherwise be defined. Also, set the BLIS_ENABLE_MEMKIND makefile variable to 'no'. - common.mk now adds libmemkind to LDFLAGS if libmemkind is enabled. commit 7dc40eafdd9af3e8c4519a8d1b04d25830b4ca7a Author: Field G. Van Zee Date: Wed Mar 21 18:39:16 2018 -0500 Updates to top-level and test driver Makefiles. Details: - Added logic to common.mk that will choose a BLIS library against which to link (LIBBLIS_LINK). The default choice is the static (.a) library; the shared (.so) library is chosen only if the shared library build was enabled and the static one was disabled. - Updated the various test driver Makefiles to reference this common, pre-chosen library against which to link. (Previously, these drivers unconditionally linked against the static library and would have failed if the static library build was disabled at configure-time.) - Renamed many of the variables in common.mk and the top-level Makefile so that variables relating to the libblis.[a|so] files, including paths to those files, begin with "LIBBLIS". - Shuffled around some of the library definitions from the top-level Makefile to common.mk. - Renamed BLIS_ENABLE_DYNAMIC_BUILD to BLIS_ENABLE_SHARED_BUILD, and the @enable_dynamic@ anchor to @enable_shared@ in build/config.mk.in and in configure. - A few other cleanups in the top-level Makefile. commit 97e1eeade3c51df1bae574a9bc1da34b05bf2bd3 Author: Field G. Van Zee Date: Wed Mar 21 15:47:11 2018 -0500 Added input.operations.fast file for 'make check'. Details: - Added an 'input.operations.fast' file to testsuite directory to go along with the 'input.general.fast' file used by the 'make check' target in the top-level Makefile. This will allow the "fast" check to prune operations and/or parameter combinations from the test space in order to save time. - Currently, input.operations.fast prunes trmm3 and all transposition and conjugation parameters from the level-3 test space. - Reduced problem size tested in input.general.fast to 100 and disabled testing of 1m method. commit c441caa95aabe69f54e2160eb67bf4ca76a66c34 Author: Field G. Van Zee Date: Tue Mar 20 17:56:02 2018 -0500 README update. Details: - Minor updates to README.md. - Minor change to blastest/Makefile. commit 6fe018eb4ac8c16f2edc916c24f5994848017b7f Author: Field G. Van Zee Date: Tue Mar 20 15:35:45 2018 -0500 Added .gitkeep file to blastest/obj. Details: - Added an empty file named '.gitkeep' to blastest/obj/ so that git will track the otherwise empty directory. (This is already done for the BLIS testsuite in testsuite/obj.) commit 0e6d000db9291342913dc5f8590a28c67bbcbc95 Author: Field G. Van Zee Date: Tue Mar 20 15:08:43 2018 -0500 Updated .gitignore to ignore BLAS test out.* files. commit 40c040a31d96fbadff11f761d0cad1ef03ef2cc5 Author: Field G. Van Zee Date: Tue Mar 20 14:33:50 2018 -0500 Fixes to .travis.yml. Details: - Invoke the full BLIS testsuite via 'make testblis' instead of the fast version via 'blistest-fast' (which was wrong anyway, since the correct fast traget is 'testblis-fast'). - Invoke the BLAS tests via 'make testblas' instead of 'blastest'. commit 664ec4813d8b53121cce7a68bef47da656ece9cb Author: Field G. Van Zee Date: Tue Mar 20 13:54:58 2018 -0500 Integrated f2c'ed netlib BLAS test suite. Details: - Created a new test suite that exercises only the BLAS compatibility found in BLIS. The test suite is a straightforward port of code obtained from netlib LAPACK, run through f2c and linked to a stripped- down version of libf2c that is compiled along with the test drivers (to prevent any obvious ABI issues). The new BLAS test suite can be run from within its new local directory, 'blastest' (through its local 'make ; make run' targets) or from the top-level Makefile (via the 'make testblas' target). Output files are created in whatever directory the test drivers are run, whether it be the 'blastest' directory, the top-level source distribution directory, or the out-of-tree directory in which 'configure' was run. Also, the results of the BLAS test suite can be checked via 'make checkblas', which summarizes the presence or absence of test failures in a single line printed to stdout. - Updated the 'test' target to run both 'testblis' and 'testblas'. - Added a new 'testblis-fast' target that runs the BLIS testsuite with smaller problem sizes, allowing it to finish more quickly. - Added a 'make check' target, which runs 'checkblis-fast' and 'checkblas'. - Changed .travis.yml so that Travis CI runs 'testblis-fast' instead of 'testblis' before (calling the check-blistest.sh script to check the result manually). - Renamed some targets in the top-level Makefile to be consistent between BLAS and BLIS. commit fc53ad6c5b2e39238b1bbbf625cc0c638b9da4e1 Author: Nisanth M P Date: Mon Mar 19 12:49:26 2018 +0530 Re-enabling the small matrix gemm optimization for target zen Change-Id: I13872784586984634d728cd99a00f71c3f904395 commit d12d34e167d7dc32732c0ed135f8065a55088106 Author: Nisanth M P Date: Mon Mar 19 11:34:32 2018 +0530 Re-enabling Zen optimized cache block sizes for config target zen Change-Id: I8191421b876755b31590323c66156d4a814575f1 commit 40fa10396c0a3f9601cf49f6b6cd9922185c932e Author: Field G. Van Zee Date: Mon Mar 19 18:19:43 2018 -0500 Fixed a few obscure bugs in the BLAS API. Details: - Fixed a missing parameter in the definition of sdsdot_(). The 'sb' argument was missing. Strangely, the argument is omitted from dsdot_() in the BLAS API. - Fixed the missing 'c' or 'u' in the "?gerc" or "?geru" operation string passed to xerbla_() by the bla_ger_check() macro. - For bla_syrk_check() and bla_syr2k_check() macros, only allow conjugate-transpose (trans='c') as a valid argument for the real domain functions [sd]syrk_() and [sd]syr2k_(). (Previously, the argument was allowed even for the complex domain equivalents, which was inconsistent with the BLAS API.) commit fe7d7f1e43e4c26249eed83d4188beee1ba96202 Author: Field G. Van Zee Date: Sun Mar 18 19:43:06 2018 -0500 Fixed cpp macro parameter "ch" typo in bla_ger.c. Details: - Previously, the BLAS routine-generating macro in bla_ger.c was incorrectly passing MKSTR(ch) into the _check() macro when it should have been passing in the char that was available, chxy. I've instead changed the name of the macro parameter from chxy to ch. Similar change as made to bla_ger.h for consistency. Thanks to Dave Love in helping track this down. (NOTE: This is actually the root cause of the bug that was first patched by increasing the length of the operation name strings passed into xerbla_(), as defined by the constant BLIS_MAX_BLAS_FUNC_STR_LENGTH, in 3d1a5a7. In theory, that change could be backed out now.) - Applied aforementioned chxy->ch change to bla_dot.[ch], as well as frame/compat/cblas/f77_sub/f77_dot_sub.[ch] (not because it needed to happen, but for naming consistency). - Reformatted function signatures/prototypes of CBLAS functions and function calls to BLAS in frame/compat/cblas/f77_sub/*.c. commit cb7ed90752d1ddbac11368c4510641ca4f3a02eb Author: Field G. Van Zee Date: Fri Mar 16 13:05:56 2018 -0500 Convert op names to uppercase before calling xerbla_(). Details: - Defined a new function, bli_string_mkupper(), that calls toupper() on every non-NULL character in a string. - Call bli_string_mkupper() prior to calling xerbla_() in the level-2/-3 BLAS _check() macros. This prevents the BLAS testsuite from complaining that the operation name (e.g. "dgemm") does not match the expected value (e.g. "DGEMM"). Thanks to Dave Love for reporting this issue. commit 3d1a5a7c08fed3ba29f060fe1db2b0dc42dde223 Author: Field G. Van Zee Date: Fri Mar 16 12:24:07 2018 -0500 Fixed printf() format overflow. Details: - Increased the length of operation name strings passed to xerbla_() in the level-2 and level-3 operation _check() functions, found in frame/compat/check. This avoids a format specifier overflow warning by gcc 7. Thanks to Dave Love for reporting this issue and suggesting the fix. commit c73055f028684d998e03b2392093c393782bbfe7 Author: Field G. Van Zee Date: Thu Mar 15 16:08:21 2018 -0500 Return after non-zero info in BLAS checks. Details: - Previously, when calling the BLAS compatibility layer, discovering a parameter check failure would result in the proper setting of the info parameter (printed by xerbla_()), but would also come with an immediate abort() rather than a return. This was incorrect behavior for two overlapping reasons. (1) BLAS should return gracefully to the caller in the event of a bad set of parameters, not abort(). (2) When BLIS was being tested via the BLAS testsuite, BLIS's xerbla_() would correctly get preempted/overridden by the xerbla_() in the BLAS testsuite, but execution would then erroneously continue on to the BLIS implementation with bad parameter values. - The previous issue was addressed by disabling the abort() in BLIS's xerbla_(), changing all of the BLAS _check() functions to cpp macros, and adding a return statement to the end of each _check() macro's "if ( info != 0 )" conditional. Thanks to Dave Love for reporting this issue. commit c4f1d18b97a6a8c3ea0366aa759db597a664062a Author: Field G. Van Zee Date: Wed Mar 14 19:10:09 2018 -0500 Minor typo fix to printing arch in testsuite. Details: - Mistakenly was calling bli_cpuid_query_id() instead of bli_arch_query_id() in the recent addition to the testsuite output that prints the active sub-configuration. The former function is only used for multi-architecture builds, whereas the latter is the more general option that also works for single configuration (including 'configure auto') builds. commit 8f2fabec800a720b3e94b33c0048cc8c4ead436d Author: Devin Matthews Date: Wed Mar 14 17:43:42 2018 -0500 Make arm32 and arm64 families work. (#176) commit fc6a1842518a0820c6708c285611346d5a1419da Author: Field G. Van Zee Date: Wed Mar 14 15:31:17 2018 -0500 Print sub-configuration name in testsuite output. Details: - Added a line to the testsuite output that prints the name of the current/active sub-configuration. This is useful when linking the testsuite against multi-configuration builds because it confirms the sub-configuration that is actually being employed at runtime. Thanks to Devin Matthews for suggesting this feature. commit 9943a899d64bf7ec4a24106f6f4c70629bbe1f6e Merge: 290dd4a9 b1a15ae6 Author: Devin Matthews Date: Wed Mar 14 13:27:44 2018 -0500 Merge pull request #173 from devinamatthews/dev Fix Cortex-A9 and Cortex-A15 configs. commit b1a15ae6ee0f46c9a95cf59f9555925e0e8e21ff Author: Devin Matthews Date: Wed Mar 14 13:26:44 2018 -0500 Use BLIS_H_FLAT commit 290dd4a9feee447e69b40ad108954af78e196f7e Author: Field G. Van Zee Date: Wed Mar 14 13:15:37 2018 -0500 Allow arbitrarily deep configuration families. Details: - Updated configure so that configuration families specified in the config_registry are no longer constrained as being only one level deep. For example, previously the x86_64 family could not be defined concisely in terms of, say, intel64 and amd64 families, and instead had to be defined as containing "haswell, sandybridge, penryn, zen, etc." In other words, families were constrained to only having singleton configurations as their members. That constraint is now lifted. - Redefined x86_64 family in config_registry in terms of intel64 and amd64. commit 9cee78e006d56543ac02fc9c488905c0434e60ae Author: Devin Matthews Date: Wed Mar 14 13:09:48 2018 -0500 Fix Cortex-A9 and Cortex-A15 configs. Tested with QEMU. commit 1a3031740f7fcbbcc2c99d5c4cb50d0413407455 Author: Field G. Van Zee Date: Tue Mar 13 16:04:40 2018 -0500 Updates to ARM hardware detection support. Details: - Updated/clarified the ARM preprocessor macro branch of bli_cpuid.c. Going forward, cortexa57 (64-bit), cortexa15, and cortexa9 (32-bit) sub-configurations are supported. However, the functions that detect features specific to a15 and a9 are identical, and since a15 is tested first, it will always be chosen for arm32 hardware (even if both sub-configurations were enabled at configure-time and the library is linked and run on an a9). Thus, more work needs to be done to distinguish these two. - Added cpp guard around x86_64 portions of bli_cpuid.c. Now, either the x86_64 or ARM code will be compiled (or neither, if neither environment is detected). - In bli_arch_query_id(), call bli_cpuid_query_id() when the BLIS_FAMILY_ARM64 or BLIS_FAMILY_ARM32 macros are defined. - Added arm64 and arm32 configuration families to config_registry. - Added a note to the arch_t typedef enum in bli_type_defs.h reminding the developer to update the string array in bli_arch.c whenever new enum values are added or existing values are reordered. commit 1442d06886ebdc34d8f1cb620229ddc6062c2ce8 Author: Field G. Van Zee Date: Sun Mar 11 16:59:50 2018 -0500 Fixed misnamed kernels in _cntx_init_cortexa57.c. Details: - Changed incorrect kernel function names in bli_cntx_init_cortexa57.c: bli_sgemm_cortexa57_asm_8x12 -> bli_sgemm_armv8a_asm_8x12 bli_dgemm_cortexa57_asm_6x8 -> bli_dgemm_armv8a_asm_6x8 Thanks to Jacob Gorm Hansen for reporting this issue. commit 28bcea37dfcf0eb99a99da6f46de2a2830393d1d Merge: b1ea3092 8b0475a8 Author: praveeng Date: Fri Mar 9 19:13:08 2018 +0530 Merge master code till 06_mar_2018 to amd-staging Change-Id: I12267e5999c92417e3715fef4f36ac2131d00f1a commit 48da9f5805f0a49f6ad181ae2bf57b4fde8e1b0a Author: Field G. Van Zee Date: Wed Mar 7 12:54:06 2018 -0600 Tweaked common.mk, Makefile, skx/knl make_defs.mk. Details: - Reorganized linker-related section of common.mk so that LDFLAGS set in a sub-configuration's make_defs.mk file will not be immediately (and erroneously) overridden by the default values. - Re-enabled redirected (to file) output of the testsuite when run from the top-level Makefile via 'make test'. (For some reason, it was commented-out for the non-verbose case.) - Removed old/unnecessary code from the make_defs.mk files of skx and knl sub-configurations. commit 8b0475a87daa177916e2caac0e530c6a57fa07cf Author: Field G. Van Zee Date: Tue Mar 6 06:39:44 2018 -0600 Fixed typo in attempted fix in 1a8350f7. Details: - Mistakenly entered 148 as knl mc blocksize for double real when the value should have been 144. Thanks to Dave Love for reporting this. commit 8912e6886b97eabb4ce0c35a3609a0fd994d347b Author: Field G. Van Zee Date: Mon Mar 5 18:00:45 2018 -0600 Fixed missing flags during shared object build. Details: - Fixed a bug in common.mk that caused warning, position-independent code, miscellaneous, and general preprocessor flags to be omitted from the configuration family-specific variables that hold those values, as registered by the family's make_defs.mk file. This would most obviously manifest when targeting a configuration family such as 'intel64' while simultaneously configuring for a shared object build, as the key '-fPIC' flag would be omitted at compile-time and prevent successful linking. Thanks to Dave Love for reporting this bug. - Other cleanups to common.mk for readability and clarity. commit 1a8350f70557fc53ca0c2eadf2076710dd0d9bc9 Author: Field G. Van Zee Date: Mon Mar 5 13:32:00 2018 -0600 Fixed cache blocksize bug in knl configuration. Details: - Changed the mc blocksize for double real execution in the knl sub- configuration from 160 to 148. The old value was not a multiple of mr (which is 24), and thus the safeguards in bli_gks_register_cntx() were tripping. Thanks for Dave Love for reporting this issue. - Switch knl sub-configuration to use default blocksizes for datatypes not supported by native kernels. - Fixed typos in bli_error.c that prevented certain error strings (which report maximum cache blocksizes not being multiples of their corresponding register blocksize) from properly initializing. commit c09fffa827fe6241dc20193a1c404496664220de Author: Field G. Van Zee Date: Sat Mar 3 13:13:39 2018 -0600 Added missing cntx_t* arg in knl packm kernels. Details: - Added the missing cntx_t* argument to the function signature of packm kernels in kernels/knl/1m/. Thanks to Dave Love for reporting this issue. commit b1ea30925dff751eced23dfa94ff578a20ea0b94 Author: Field G. Van Zee Date: Fri Feb 23 17:42:48 2018 -0600 CHANGELOG update (0.3.0) Change-Id: Id038b00a62de51c9818ad249651ec5dc662f4415 commit 1ef9360b1fd0209fbeb5766f7a35402fbd080fcb Author: Field G. Van Zee Date: Thu Mar 1 14:36:39 2018 -0600 Enable non-unit vector stride tests by default. Details: - Change "vector storage schemes to test" parameter in testsuite's input.general file to "cj". This means that both unit stride column vectors and non-unit stride column vectors will be tested in operations with vector operands (e.g. level-1v, level-1f, level-2). - Very minor comment (typo) changes to input.operations. commit 8c4e55a1a1ead9a5e970200fee027ffd2c7e8454 Author: Field G. Van Zee Date: Wed Feb 28 17:01:47 2018 -0600 Added individual operation overrides in testsuite. Details: - Updated the testsuite driver so that setting one or more individual operation test switches to "2" in input.operations will enable ONLY those operations and disable all others, regardless of the values of the section overrides and other operation switches. This makes it every easy to quickly test only one or two operations, and equally easy to revert back to the previous combination of operation tests. - Added more comments to input.operations describing the use of individual "enable only" overrides. commit 34862aed89e5d5a8f35aeecd49f3052ada1f337b Author: Field G. Van Zee Date: Wed Feb 28 15:30:14 2018 -0600 Use zen kernels in haswell sub-configuration. Details: - Register use of level-1v zen intrinsic kernels for amaxv, axpyv, dotv, dotxv, and scalv, as well asl level-1f zen intrinsic kernels for axpyf and dotxf. This works because these kernels simply target AVX/AVX2, and therefore work without modification on haswell hardware. - Switch to use of zen microkernels in bli_cntx_init_haswell.c. The zen kernels are essentially identical to those used by haswell, except that now zen kernels are a bit more up-to-date. In the future, I may continue to maintain duplicates, or I may keep the kernels named after one architecture (zen or haswell) but used by both sub-configurations. - In config_registry, enable use of both haswell and zen kernels for the haswell sub-configuration. This is necessary in order to make zen kernels visible when registering kernels in bli_cntx_init_haswell.c. - Enable use of assembly-based complex gemm microkernels for zen, bli_cgemm_zen_asm_3x8() and bli_zgemm_zen_asm_3x4(), in bli_cntx_init_zen.c. This was actually intended for 1681333. commit 709f8361ebc90b96b02ebe5c5ffb6fc3b1b25e58 Author: Field G. Van Zee Date: Fri Feb 23 17:42:48 2018 -0600 Version file update (0.3.0) commit d9079655c9cbb903c6761d79194a21b7c0a322bc Author: Field G. Van Zee Date: Fri Feb 23 17:42:48 2018 -0600 CHANGELOG update (0.3.0) commit 3defc7265c12cf85e9de2d7a1f243c5e090a6f9d Author: Field G. Van Zee Date: Fri Feb 23 17:38:19 2018 -0600 Applied 34b72a3 to non-active/unused microkernels. Details: - Applied the read-beyond-bounds bugfix in 34b72a3 to other haswell and zen kernels (ie: other microtile shapes) which are not used by default. This was done mostly in case someone decided to pick up these kernels and start using them, not because it affects BLIS's behavior out-of-the-box. commit 34b72a351745aa0d47bb0b74ebcd0f0a616d613d Author: Field G. Van Zee Date: Fri Feb 23 16:33:32 2018 -0600 Fixed obscure read-beyond-bounds bug in sgemm ukrs. Details: - Fixed an obscure bug in the bli_sgemm_haswell_asm_6x16 and bli_sgemm_zen_asm_6x16 microkernels when the input/output matrix C is stored with general stride (ie: both rs and cs are non-unit). The bug was rooted in the way those microkernels read from matrix C-- namely, they used vmovlps/vmovhps instead of movss. By loading two floats at a time, even if one of them was treated as junk, the assembly code could be written in a more concise manner. However, under certain conditions--if m % mr == 0 and n % nr == 0 and the underlying matrix is not an internal "view" into a larger matrix-- this could result in the very last vmovhps of the last (bottom-right) microkernel invocation reading beyond valid memory. Specifically, the low 32 bits read would always be valid, but the high 32 bits could reside beyond the bounds of the array in which the output C matrix is contained. To remedy this situation, we now selectively use movss to load any element that could be the last element in the matrix. commit 5112e1859e7f8888f5555eb7bc02bd9fab9b4442 Author: Field G. Van Zee Date: Fri Feb 23 14:31:26 2018 -0600 Added missing 'restrict' to some kernels' cntx_t*. Details: - Added missing 'restrict' keyword to cntx_t* argument of function signatures corresponding to level-1v, level-1f, and level-1m kernels. This affected bli_l1v_ker_prot.h, bli_l1f_ker_prot.h, and bli_l1m_ker_prot.h. (The 'restrict' was already being used to qualify cntx_t* arguments for kernels defined in bli_l3_ker_prot.h.) - Added comments to bli_l1v_ker.h, bli_l1f_ker.h, bli_l1m_ker.h, and bli_l3_ukr.h that help explain how those headers function to produce kernel prototypes using the prototype macros defined in the files mentioned above. commit 1fa8af95d807168e0849adb668492601e7009be0 Merge: c084b03b 16813335 Author: Field G. Van Zee Date: Wed Feb 21 17:54:02 2018 -0600 Merge branch 'rt' commit c084b03b31d84427a120e391963db5419f1911ee Merge: 5d03b6e6 fa74af4e Author: Field G. Van Zee Date: Wed Feb 21 17:52:17 2018 -0600 Merge branch 'rt' commit 16813335bdb5978bc9a26cd00a32bd5a130130c4 Merge: fa74af4e 5a7005dd Author: Field G. Van Zee Date: Wed Feb 21 17:43:32 2018 -0600 Merge branch 'amd' into rt Details: - Merged contributions made by AMD via 'amd' branch (see summary below). Special thanks to AMD for their contributions to-date, especially with regard to intrinsic- and assembly-based kernels. - Added column storage output cases to microkernels in bli_gemm_zen_asm_d6x8.c and bli_gemmtrsm_l_zen_asm_d6x8.c. Even with the extra cost of transposing the microtile in registers, this is much faster than using the general storage case when the underlying matrix is column-stored. - Added s and d assembly-based zen gemmtrsm_u microkernel (including column storage optimization mentioned above). - Updated zen sub-configuration to reflect presence of new native kernels. - Temporarily reverted zen sub-configuration's level-3 cache blocksizes to smaller haswell values. - Temporarily disabled small matrix handling for zen configuration family in config/zen/bli_family_zen.h. - Updated zen CFLAGS according to changes in 1e4365b. - Updated haswell microkernels such that: - only one vzeroupper instruction is called prior to returning - movapd/movupd are used in leiu of movaps/movups for double-real microkernels. (Note that single-real microkernels still use movaps/movups.) - Added kernel prototypes to kernels/zen/bli_kernels_zen.h, which is now included via frame/include/bli_arch_config.h. - Minor updates to bli_amaxv_ref.c (and to inlined "test" implementation in testsuite/src/test_amaxv.c). - Added early return for alpha == 0 in bli_dotxv_ref.c. - Integrated changes from f07b176, including a fix for undefined behavior when executing the 1m method under certain conditions. - Updated config_registry; no longer need haswell kernels for zen sub-configuration. - Tweaked marginal and pass thresholds for dotxf. - Reformatted level-1v, -1f, and -3 amd kernels and inserted additional comments. - Updated LICENSE file to explicitly mention that parts are copyright UT-Austin and AMD. - Added AMD copyright to header templates in build/templates. Summary of previous changes from 'amd' branch. - Added s and d assembly-based zen gemm microkernels (d6x8 and d8x6) and s and d assembly-based zen gemmtrsm_l microkernels (d6x8). - Added s and d intrinsics-based zen kernels for amaxv, axpyv, dotv, dotxv, and scalv, with extra-unrolling variants for axpyv and scalv. - Added a small matrix handler to bli_gemm_front(), with the handler implemented in kernels/zen/3/bli_gemm_small_matrix.c. - Added additional logic to sumsqv that first attempts to compute the sum of the squares via dotv(). If there is a floating-point exception (FE_OVERFLOW), then the previous (numerically conservative) code is used; otherwise, the result of dotv() is square-rooted and stored as the result. This new implementation is only enabled when FE_OVERFLOW is #defined. If the macro is not #defined, then the previous implementation is used. - Added axpyv and dotv standalone test drivers to test directory. - Added zen support to old cpuid_x86.c driver in build/auto-detect/old. - Added thread-local and __attribute__-related macros to bli_macro_defs.h. commit 5d03b6e6e19d5a07f0cccf1a158f02fbd62dfd99 Author: Devin Matthews Date: Mon Feb 19 11:31:30 2018 -0600 Fix asm macro include line for KNL. Fixes #167. commit f07b176c84dc9ca38fb0d68805c28b69287c938a Author: Field G. Van Zee Date: Thu Feb 15 18:36:54 2018 -0600 Fixed an obscure bug in the 1m implementation. Details: - Fixed a bug in the way the bli_gemm1m_cntx_ref() function (defined in ref_kernels/bli_cntx_ref.c) initializes its context for 1m execution. Previously, the function probed the context that was in the process of being updated for use with 1m--this context being previously initialized/copied from a native context--for its storage preference to determine which "variant" (row- or column-oriented) of 1m would be needed. However, the _cntx_ref() function was not updating the method field of the context until AFTER this query, and the conditional which depended on it, had taken place, meaning the storage preference query function would mistakenly think the context was for native execution, since the context's method field would still be set to BLIS_NAT. This would lead it to incorrectly grab the storage preference of the complex domain microkernel rather than the corresponding real domain microkernel, which could cause the storage preference predicate to evaluate to the wrong value, which would lead to the _cntx_ref() function choosing the wrong variant. This could lead to undefined behavior at runtime. The method is now explicitly set within the context prior to calling the storage preference query function. - Updated comments in frame/ind/oapi/bli_l3_3m4m1m_oapi.c. - Fixed a typo in the commented-out CFLAGS in config/zen/make_defs.mk, which are appropriate for gcc 6.x and newer. (Mistakenly used -march=bdver4 instead of -march=znver1.) commit 1f94bb7b96eb2b67257e6c4df89e29c73e9ab386 Author: Field G. Van Zee Date: Fri Jan 19 12:46:53 2018 -0600 Document how to enable zen-specific instructions. Details: - Added as a comment in config/zen/make_defs.mk the list of compiler flags that could be added to manually enable the instructions provided by the Zen microarchitecture that are not already implied by -march=bdver4. This information, along with the previous commit's flags to selectively disable Bulldozer instructions no longer present in Zen, was gathered from [1]. I hesitate to enable use of these instructions since I don't have any Zen hardware to test on yet. [1] https://wiki.gentoo.org/wiki/Ryzen commit 1e4365b21bafa02bd108c5ac4705a25671fb9441 Author: Field G. Van Zee Date: Thu Jan 18 12:03:51 2018 -0600 Augment zen CFLAGS to prevent illegal instruction. Details: - Added various compiler flags (-mno-fma4 -mno-tbm -mno-xop -mno-lwp) so that compiling with -march=bdver4 on zen-based architectures does not result in an illegal instruction error at runtime. Note: This fix is only needed for gcc 5.4; gcc 6.3 or later supports the use of -march=znver1, which can be used in lieu of the augmented set of flags based on bdver4. Thanks to Nisanth Padinharepatt for reporting this error. commit fa74af4e1fa7385ac3f3089fe1ea7bb88c906029 Author: Field G. Van Zee Date: Tue Jan 9 13:43:15 2018 -0600 Minor labeling update for './configure -c' output. Details: - Print the name of the configuration in the output of the kernel-to-config map (and chosen pairs list) as a subtle way to remind the user that these only apply to the targeted configuration (whereas the config list and kernel list are printed without regard to which configuration was actually targeted). commit 5cdea756c7391e2c6cbfb38436ef9a205f860237 Merge: 9d8858b5 1e7a4896 Author: Field G. Van Zee Date: Sun Jan 7 19:45:20 2018 -0600 Merge branch 'rt' commit 9d8858b5cff4a4b078b87872847a5710073fff0a Merge: 0b3ca3cf f7df64da Author: Devin Matthews Date: Sun Jan 7 10:03:25 2018 -0600 Merge pull request #164 from devinamatthews/master Don't use memkind for skx configuration. commit f7df64daf6bbe6431effada6e13d8d1fab5aa221 Author: Devin Matthews Date: Sun Jan 7 09:37:25 2018 -0600 Don't use memkind for skx configuration. Fixes #163. commit 1e7a4896e0cbe73c4685fa956278e3f28273cdf9 Author: Field G. Van Zee Date: Fri Jan 5 12:33:48 2018 -0600 Minor error handling in update-version-file.sh. Details: - Added explicit handling of situations when 'git describe --tags' returns an error. This command is used by update-version-file.sh when deciding whether or not to update the version file prior to configuration. - Removed bli_packm.c and bli_unpackm.c, as they contained no source code. commit 0b3ca3cfb682715a3686fd93ebb10d4a695d1162 Author: Field G. Van Zee Date: Thu Jan 4 20:51:35 2018 -0600 Intelligently select compiler for auto-detection. Details: - Rewrote code that selects the compiler for the purposes of compiling the auto-detection executable. CC (if specified) is tried first. Then gcc. Then clang. The absolute fallback is cc. The previous code was sort of broken, and seemed to unintentionally always use gcc. - Moved various configuration-agnostic flags from config/*/make_defs.mk files to common.mk. The new mechanism appends the configuration- agnostic flags to the various compiler flag variables initialized in make_defs.mk. Flags specific to the sub-configuration are still set in make_defs.mk. - Added -Wno-tautological-compare to CMISCFLAGS when clang is in use. Also added the flag to the compiler instantiation during configure- time hardware detection (when clang is selected). - Added some missing (but mostly-optional) quotes to configure script. commit 5a7005dd44ed3174abbe360981e367fd41c99b4b Merge: 7be88705 3bc99a96 Author: Nisanth M P Date: Wed Jan 3 12:05:12 2018 +0530 Merge changes in AMD beta release 0.95 into amd branch commit 0b9c5127e91508c115228ca604ee2dac8de8f477 Author: Field G. Van Zee Date: Sat Dec 23 15:53:44 2017 -0600 Enabled C99, added stdint.h to auto-detect build. Details: - Added "-std=c99" to compiler arguments when building auto-detection driver in configure script. - Added #include to all three source files needed by auto- detection program. commit 0ce5e19c318e04909d3e664d69accb3a0fc6b988 Author: Field G. Van Zee Date: Sat Dec 23 15:32:03 2017 -0600 Reimplemented configure-time hardware detection. Details: - Reimplemented the hardware detection functionality invoked when running "./configure auto". Previously, a standalone script in build/auto-detect that used CPUID was used. However, the script attempted to enumerate all models for each microarchitecture supported. The new approach recycles the same code used for runtime hardware detection introduced in 2c51356. This has two immediate benefits. First, it reduces and consolidates the code required to detect microarchitectures via the CPUID instruction. Second, it provides an indirect way of testing at configure-time the code that is used to detect hardware at runtime. This code is (a) only activated when targeting a configuration family (such as intel64 or amd64) at configure-time and (b) somewhat difficult to test in practice, since it relies on having access to older microarchitectures. - The above change required placing conditional cpp macro blocks in bli_arch.c and bli_cpuid.c which either #include "blis.h" or #include a bare-bones set of headers that does not rely on the presence of a bli_config.h header. This is needed because bli_config.h has not been created yet when configure-time auto-detection takes places. - Defined a new function in bli_arch.c, bli_arch_string(), which takes an arch_t id and returns a pointer to a string that contains the lowercase name of the corresponding microarchitecture. This function is used by the auto-detection script to printf() the name of the sub-configuration corresponding to the detected hardware. commit 9804adfd405056ec332bb8e13d68c7b52bd3a6c1 Author: Field G. Van Zee Date: Thu Dec 21 19:22:57 2017 -0600 Added option to disable pack buffer memory pools. Details: - Added a new configure option, --[en|dis]able-packbuf-pools, which will enable or disable the use of internal memory pools for managing buffers used for packing. When disabled, the function specified by the cpp macro BLIS_MALLOC_POOL is called whenever a packing buffer is needed (and BLIS_FREE_POOL is called when the buffer is ready to be released, usually at the end of a loop). When enabled, which was the status quo prior to this commit, a memory pool data structure is created and managed to provide threads with packing buffers. The memory pool minimizes calls to bli_malloc_pool() (i.e., the wrapper that calls BLIS_MALLOC_POOL), but does so through a somewhat more complex mechanism that may incur additional overhead in some (but not all) situations. The new option defaults to --enable-packbuf-pools. - Removed the reinitialization of the memory pools from the level-3 front-ends and replaced it with automatic reinitialization within the pool API's implementation. This required an extra argument to bli_pool_checkout_block() in the form of a requested size, but hides the complexity entirely from BLIS. And since bli_pool_checkout_block() is only ever called within a critical section, this change fixes a potential race condition in which threads using contexts with different cache blocksizes--most likely a heterogeneous environment--can check out pool blocks that are too small for the submatrices it wishes to pack. Thanks to Nisanth Padinharepatt for reporting this potential issue. - Removed several functions in light of the relocation of pool reinit, including bli_membrk_reinit_pools(), bli_memsys_reinit(), bli_pool_reinit_if(), and bli_check_requested_block_size_for_pool(). - Updated the testsuite to print whether the memory pools are enabled or disabled. commit 107801aaae180c00022f1b990bc59038c14949d2 Merge: d9c05745 0084531d Author: Field G. Van Zee Date: Mon Dec 18 16:29:28 2017 -0600 Merge branch 'master' into selfinit commit 0084531d3eea730a319ecd7018428148c81bbba7 Author: Field G. Van Zee Date: Sun Dec 17 18:58:25 2017 -0600 Updated flatten-headers.py for python3. Details: - Modifed flatten-headers.py to work with python 3.x. This mostly amounted to removing print statements (which I replaced with calls to my_print(), a wrapper to sys.stdout.write()). Thanks to Stefan Husmann for pointing out the script's incompatibility with python 3. - Other minor changes/cleanups. commit 90b11b79c302f208791bdfb1ed754873103c7ce5 Author: Field G. Van Zee Date: Sun Dec 17 17:34:32 2017 -0600 Modest performance boost to flatten-headers.py. Details: - Updated flatten-headers.py to pre-compile the main regular expression used to isolate #include directives and the header filenames they reference. The compiled regex object is then used over and over on each header file in the tree of referenced headers. This appears to have provided a 1.7-2x performance increase in the best case. - Other minor tweaks, such as renaming the main recursive function from replace_pass() to flatten_header(). commit 99dee87f30b4d437fa6b5e4ba862526d07b9f08b Author: Field G. Van Zee Date: Sun Dec 17 16:47:27 2017 -0600 Reimplemented flatten-headers.sh in python. Details: - Added flatten-headers.py, a python implementation of the bash script flatten-headers.sh. The new script appears to be 25-100x faster, depending on the operating system, filesystem, etc. The python script abides by the same command line interface as its predecessor and targets python 2.7 or later. (Thanks to Devin Matthews for suggesting that I look into a python replacement for higher performance.) - Activated use of flatten-headers.py in common.mk via the FLATTEN_H variable. - Made minor tweaks to flatten-headers.sh such as spelling corrections in comments. commit d9c0574599c3f97c0f9b6c334a077bab9452e1f4 Author: Field G. Van Zee Date: Thu Dec 14 17:13:42 2017 -0600 Allow travis failures of OS X builds that run testsuite. Details: - Added an allowance for OS X builds that run the testsuite to fail. There seems to be an issue with 1m when running in Travis CI under OS X and clang, but only in double-precision. Haven't been able to reproduce the error on my own, and thus, I can't debug it. (Hopefully it is simply a version-specific compiler bug.) commit 86cd23b7379b00a42b4ecc04fa668f1e3f9b54ee Author: Field G. Van Zee Date: Thu Dec 14 15:47:41 2017 -0600 Fixed testsuite Makefile brokenness from 9091a207. Details: - Fixed a makefile error encountered when building the testsuite directly in its directory (as opposed to indirectly via 'make test'). The fix involves introducing a new variable, BUILD_PATH, alongside the existing DIST_PATH variable. By default, BUILD_PATH is set to the current directory, and is overridden by other Makefiles used by, for example, the testsuite and standalone test drivers in testsuite or test, respectively. - Some files/directories in common.mk were redefined in terms of BUILD_DIR, such as the locations of config.mk file and the intermediate include directory. commit 6a3a8924c04d25507fc4aa593df30c56c7dc12f7 Author: Field G. Van Zee Date: Thu Dec 14 13:20:02 2017 -0600 Temporarily show Makefile's testsuite output. Details: - Disabled redirection of testsuite output for 'test' target. This is part of an attempt to debug a segmentation fault on OS X via Travis. commit 9a01080dd426915bed18229f70401bfa639dc283 Merge: 83316485 a32e8a47 Author: Field G. Van Zee Date: Thu Dec 14 11:27:19 2017 -0600 Merge branch 'master' into selfinit commit a32e8a47c022b6071302b2956af5728976c83ca9 Author: Field G. Van Zee Date: Wed Dec 13 16:31:36 2017 -0600 Added an exclusion to .travis.yml. Details: - Added exclusion for out-of-tree builds on OS X (clang). commit b9f7d987df548965c86e16e0ba94d5cad0d9b399 Author: Field G. Van Zee Date: Wed Dec 13 16:22:09 2017 -0600 Cleaned up after previous travis oot debugging. Details: - Removed debugging output from common.mk related to Travis CI out-of-tree builds. - Other minor cleanups to common.mk. commit 9091a207aa8c49e279676ea02be533480b3b0d5a Author: Field G. Van Zee Date: Wed Dec 13 16:12:34 2017 -0600 Attempted fix to travis oot build failure. Details: - Found the likely cause of the Travis CI out-of-tree build failures: config.mk was being read from DIST_PATH, rather than the current directory. commit c01c71c33e236e6c91f5ddd3ec1e3faec89368c1 Author: Field G. Van Zee Date: Wed Dec 13 15:58:50 2017 -0600 Added debugging output to Makefile. Details: - Added $(info ...) statements in key locations in an attempt to reveal why Travis CI doesn't like building BLIS out-of-tree. commit 784289d69dd6b3692444d3b3e290f6a014465b72 Author: Field G. Van Zee Date: Wed Dec 13 15:31:27 2017 -0600 Updated SHELL in common.mk from /bin/bash to bash. commit d9bb1d1d4ebc89ea75d9d927d09882162a914f77 Author: Field G. Van Zee Date: Wed Dec 13 15:27:54 2017 -0600 Defined SHELL in common.mk so "echo -n" works. Details: - Defined the SHELL variable in common.mk as "/bin/bash" so that the -n option can be used with echo in the Makefile rule for flattening blis.h. Thanks to Devin Matthews for suggesting this fix. commit 9289a08667df2044f3a37af54d893efe2b56d555 Author: Field G. Van Zee Date: Wed Dec 13 15:14:27 2017 -0600 Attempt 3 on .travis.yml. commit 720bfcf0ef54fdc41df0dcaa94503edb0d5c8972 Author: Field G. Van Zee Date: Wed Dec 13 14:52:28 2017 -0600 More fixes to .travis.yml. Details: - Fixed a mistake (hopefully) in d0c4dd0 that resulted in many more osx/clang sub-tests than intended. - Shortened the variable names in an effort to make them more readable via the Travis CI web interface. commit 8717c9c97fe9b1ecd3b3192049a73976f8390ca7 Author: Field G. Van Zee Date: Wed Dec 13 14:36:37 2017 -0600 Added 'pwd' commands to .travis.yml for debugging. Details: - Added 'pwd' commands to the script portion of the .travis.yml file in an attempt to uncover the problem with the recent out-of-tree build testing changes made in d0c4dd0. commit 83316485ce10f6fcafe92a1c146282de0dd8068a Author: Field G. Van Zee Date: Wed Dec 13 14:14:50 2017 -0600 Simplified/fixed self-initialization. Details: - Fixed a race condition in self-initialization whereby the bli_is_init static variable could be erroneously read as TRUE by thread 1 while thread 0 is still executing bli_init_apis(), thus allowing thread 1 to use the library before it is actually ready. Thanks to to Minh Quan Ho and Devin Matthews for pointing out this issue. - Part of the solution to the aforementioned race condition was involved replacing the runtime initialization of the global scalar constants (e.g., BLIS_ONE, BLIS_ZERO, etc.) in bli_const.c with a static initialization of those same constants. This eliminates the need for bli_const_init() altogether. (The static initialization is made concise via preprocess macros.) - Defined bli_gks_query_cntx_noinit(), which behaves just like bli_gks_query_cntx(), except that it does not call bli_init_once(). This function is called in lieu of bli_gks_query_cntx() in bli_ind_init() and bli_memsys_init() so as to not result in any recursion into bli_init_once(). - Removed BLIS_ONE_HALF, BLIS_MINUS_ONE_HALF global scalar constants. They have no use in BLIS or its test products, and we have little reason to believe they are used by others. - Removed testsuite/out file, which was accidentally committed as part of 70640a3. commit 6526d1d4ae6dbfa854ca8d1e5f224cd6ab3fa958 Author: Field G. Van Zee Date: Tue Dec 12 13:50:43 2017 -0600 Added temp_dir argument to flatten-headers.sh. Details: - Added "temp_dir" argument to flatten-headers.sh so that the caller can specify where intermediate files should be created as the script runs. - Updated flatten-headers.sh to create intermediate files in temp_dir instead of alongside the corresponding source files. This should now (once again) allow out-of-tree builds where the BLIS distribution is read-only, or where the out-of-tree build is running concurrently with another out-of-tree build. (Thanks to Devin Matthews for pointing out the possibility of simultaneous out-of-tree builds.) commit 94755017c967630daf2e31c1f63ed5e88ab0d6ab Merge: d0c4dd00 5cf7b0c4 Author: Field G. Van Zee Date: Tue Dec 12 12:50:41 2017 -0600 Merge branch 'master' of github.com:flame/blis commit d0c4dd000ff38acc249e8acf7e0655a523991695 Author: Field G. Van Zee Date: Tue Dec 12 12:47:53 2017 -0600 Added out-of-tree build test to .travis.yml file. Details: - Modified .travis.yml file to include an out-of-tree build test (using the "auto" configure target). Thanks to Devin Matthews for this suggestion. commit 5cf7b0c4e52922069183a87dc2aa177419644e04 Author: Devin Matthews Date: Tue Dec 12 12:38:48 2017 -0600 Ignore blis.h.interm [ci skip] commit 8d8ff74d15b4a584929cec36034ba6d3c53f7d27 Author: Field G. Van Zee Date: Tue Dec 12 12:32:50 2017 -0600 Further attempt to fix out-of-tree builds. Details: - Fix applied in 87978f6 was necessary but not sufficient to fix out-of-tree builds. It turns out that using a source tree that had already built the target erroneously gave the impression that out-of-tree builds were working again, when in fact they were still broken. The additional changes in this commit should complete the fix that was started in the aforementioned commit. Thanks to Devin Matthews and Shaden Smith for their help in isolating this issue. commit 70640a37109290b57c344083c00624e13c496e30 Author: Field G. Van Zee Date: Mon Dec 11 17:18:43 2017 -0600 Implemented library self-initialization. Details: - Defined two new functions in bli_init.c: bli_init_once() and bli_finalize_once(). Each is implemented with pthread_once(), which guarantees that, among the threads that pass in the same pthread_once_t data structure, exactly one thread will execute a user-defined function. (Thus, there is now a runtime dependency against libpthread even when multithreading is not enabled at configure-time.) - Added calls to bli_init_once() to top-level user APIs for all computational operations as well as many other functions in BLIS to all but guarantee that BLIS will self-initialize through the normal use of its functions. - Rewrote and simplified bli_init() and bli_finalize() and related functions. - Added -lpthread to LDFLAGS in common.mk. - Modified the bli_init_auto()/_finalize_auto() functions used by the BLAS compatibility layer to take and return no arguments. (The previous API that tracked whether BLIS was initialized, and then only finalized if it was initialized in the same function, was too cute by half and borderline useless because by default BLIS stays initialized when auto-initialized via the compatibility layer.) - Removed static variables that track initialization of the sub-APIs in bli_const.c, bli_error.c, bli_init.c, bli_memsys.c, bli_thread, and bli_ind.c. We don't need to track initialization at the sub-API level, especially now that BLIS can self-initialize. - Added a critical section around the changing of the error checking level in bli_error.c. - Deprecated bli_ind_oper_has_avail() as well as all functions bli__ind_get_avail(), where is a level-3 operation name. These functions had no use cases within BLIS and likely none outside of BLIS. - Commented out calls to bli_init() and bli_finalize() in testsuite's main() function, and likewise for standalone test drivers in 'test' directory, so that self-initialization is exercised by default. commit 70a64432ee5a7adbee10fb7ff6d7b608c1940a7a Author: Field G. Van Zee Date: Mon Dec 11 13:14:20 2017 -0600 Fixed off-by-one indexing in bli_cpuid.c. Details: - In bli_cpuid.c, fixed an off-by-one indexing statement in vpu_count() whereby a string-terminating NULL character, '\0', is written beyond the bounds of the model_num string. - Minor whitespace and formatting edits to bli_cpuid.c. commit 87978f6261a080d261d01f9acf4e9cc18855c833 Author: Field G. Van Zee Date: Mon Dec 11 12:49:03 2017 -0600 Fixed broken out-of-tree builds since 52f9e6f. Details: - Added missing $(DIST_PATH)/ prefix to relative path to flatten-headers.sh script in common.mk so that the script could be found during out-of-tree builds. Thanks to Devin Matthews for reporting this bug. commit 513ef4d040f89a18dda5154e8c4cf1aaf7463999 Author: Field G. Van Zee Date: Mon Dec 11 12:35:59 2017 -0600 Various typecasting fixes, mis-typed enums, etc. Details: - Fixed implicit typecasting of conj_t to trans_t in bli_[un]packm_cxk.c. - Properly typecast integer arguments to match format specifier in various calls to printf() in bli_l3_thrinfo.c, bli_cntx.c, bli_pool.c, and bli_util_oapi.c. - Fixed "unsigned less-than-comparison with zero" checks in bli_check.c, bli_cntx.h. - Fixed mis-typed enums in bli_cntx.c (e.g., l1mkr_t that should have been l1fkr_t or l1vkr_t). - Fixed instances of opid_t value BLIS_GEMM that should have been l3ukr_t value BLIS_GEMM_UKR in bli_cntx_ref.c. - NOTE: These issues were identified via compiler warnings when building BLIS with clang on a rather old installation of OS X: $ clang --version Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn) Target: x86_64-apple-darwin15.2.0 Thread model: posix commit 3bc99a96a3648f51b9acdc8a8c7e1cf4eb815459 Merge: 3a441183 78199c53 Author: prangana Date: Mon Dec 11 12:53:03 2017 +0530 Fix merge conflicts after rebase with release branch Change-Id: I581b26c6d515f717ff0dce91c7c0c92553aa2630 commit 3a44118398955d6f872e01f73ae5bb4a4f8500f7 Author: Nisanth M P Date: Wed Nov 15 11:11:17 2017 +0530 Added AMD copyright line to the changed files in last 3 commits Change-Id: I37d5dbbbe1b199e07529610a5e9cc9e49d067c66 commit 268a56c06e94d1c388766dbfe81d54efbe432809 Author: Field G. Van Zee Date: Wed Nov 1 11:51:41 2017 -0500 Revert to default SIMD alignment for bulldozer. Details: - Removed the default-overriding #define of BLIS_SIMD_ALIGN_SIZE set in config/bulldozer/bli_kernel.h. Not sure where this value came from, but it would seem to allow for insufficient starting address alignment for any matrices created via bli_malloc_user(), such as via bli_obj_create(). Thanks to Rene Sitt for reporting the behavior that led us to this bug. - This commit is a manual patch of the same fix made to the 'rt' branch in 8f150f2. commit 510a6863e28277f9446abfb77f1aea9f01d37e7a Author: Devin Matthews Date: Mon Oct 30 10:04:42 2017 -0500 Fix CVECFLAGS for bulldozer config. commit c669716790bdda5d2b11ea0a026cbc121b228842 Author: Nisanth M P Date: Tue Oct 24 16:36:36 2017 +0530 Adding __attribute__((constructor/destructor)) for CLANG case. CLANG supports __attribute__, but its documentation doesn't mention support for constructor/destructor. Compiling with clang and testing shows that it does support this. Change-Id: Ie115b20634c26bda475cc09c20960d687fb7050b commit 24e64a9d0877d788357fc63d4b947e977f8697f7 Author: Field G. Van Zee Date: Wed Oct 18 13:41:25 2017 -0500 Removed a duplicate bli_avx512_macros.h header. Details: - Removed a duplicate header file that was causing problems during installation for the 'knl' configuration. Thanks to Victor Eijkhout for reporting this issue. commit 9c0a3c4c0260cbfefb9f11532f46508b4fd19ec2 Author: Nisanth M P Date: Mon Oct 16 22:06:57 2017 +0530 Thread Safety: Move bli_init() before and bli_finalize() after main() BLIS provides APIs to initialize and finalize its global context. One application thread can finalize BLIS, while other threads in the application are stil using BLIS. This issue can be solved by removing bli_finalize() from API. One way to do this is by getting bli_finalize() to execute by default after application exits from main(). GCC supports this behaviour with the help of __attribute__((destructor)) added to the function that need to be executed after main exits. Similarly bli_init() can be made to run before application enters main() so that application need not call it. Change-Id: I7ce6cfa28b384e92c0bdf772f3baea373fd9feac commit 83f31253eb21c5ecd8a5907835e57720daae0b8b Author: Nisanth M P Date: Mon Oct 16 21:07:50 2017 +0530 Thread safety: Make the global induced method status array local to thread BLIS retains a global status array for induced methods, and provides APIs to modify this state during runtime. So, one application thread can modify the state, before another starts the corresponding BLIS operation. This patch solves this issue by making the induced method status array local to threads. Change-Id: Iff59b6f473771344054c010b4eda51b7aa4317fe commit e923402e68029be379a4297de3ac6fb155ffd928 Author: sthangar Date: Thu Sep 28 12:15:36 2017 +0530 The inner loop paralleization is turned off by default, the JR and IR loop parameters are set to 1 by default Change-Id: I8c3c2ecbbd636259f6ffb92768ec04148205c3e5 commit a64c15de19327c7595376d699be676c7003e850e Author: Field G. Van Zee Date: Tue Sep 26 19:02:53 2017 -0500 Fixed a pthread typo in previous commit. Details: - Misnamed 'pthread_mutex_t' type in bli_memsys.c as 'thread_mutex_t'. commit 42dcd589c37e1a2473ab2e1539207da97aebc07f Author: Field G. Van Zee Date: Tue Sep 26 17:00:04 2017 -0500 Fixed bugs in gemm/gemmtrsm ukr tests in testsuite. Details: - Fixed a bug in gemmtrsm test module that was due to improper partitioning into a k x k triangular matrix for the purposes of obtaining an mr x k micropanel of A with which to test. - Fixed a bug in gemm and gemmtrsm test modules that would only manifest for very large k (depending on the product of mr x kc on that architecture). The bug arose from the fact that the test module was triggering the allocation of blocks from the internal memory pools, which are limited in size. This allocation imposes an implicit assumption that the micro- panel being tested with will fit inside, and this assumption is violated for large values of k. Arbitrarily large k may now be tested for both operation tests. - Added OpenMP/pthread critical sections around the setting or getting of statuses from the induced method operation lookup table in bli_l3_ind.c. - Added the 'static' keyword to all pthread_mutex_t global variables in BLIS. - Thanks to Nisanth Padinharepatt of AMD for reporting the first and third issues. commit 206beb68ff73b75f5c382413967aacbb8a0aac3a Author: Field G. Van Zee Date: Sat Sep 9 14:10:15 2017 -0500 Updated bibtex info for BLIS5 (3m4m) article. commit 0c8c0363aeb1f4aa88f7ec2d02403dab05a6e014 Author: sthangar Date: Mon Aug 28 16:44:42 2017 +0530 Bug fix for the testsuite build failing Change-Id: I7cd8c9d187387c48b2564e45cbfb8df985e93d77 commit 63d1c84465b50f64787808dd3e8494e683c16821 Author: sthangar Date: Wed Aug 23 13:01:14 2017 +0530 Adding auto hardware detection for Zen Change-Id: I40ce6705dd66b35000c4ccddffad1c5b65998caf commit 537fb2a895b09be94b11947696fd2da629be24dd Author: Devin Matthews Date: Tue Aug 15 10:02:25 2017 -0500 Add vzeroupper to Intel AVX kernels. commit 7628de3f76f78a44788807605a4601ddda445854 Author: Field G. Van Zee Date: Thu Aug 10 16:24:28 2017 -0500 Removed trailing enum commas from bli_type_defs.h. Details: - Removed trailing commas from enums in bli_type_defs.h. Thanks to Erling Andersen for pointing out this inconsistency and suggesting the change. commit a666fd4e267ffae3d4b21f38d569c61ff56adc9e Author: Field G. Van Zee Date: Sat Aug 5 13:04:31 2017 -0500 Added edge handling to _determine_blocksize_b(). Details: - Added explicit handling of situations where i == dim to bli_determine_blocksize_b_sub(). This isn't actually needed by any current use case within BLIS, but handling the situation is nonetheless prudent. Thanks to Minh Quan for reporting this issue and requesting the fix. commit 0c8afa546d7f33760415519ba328d7c49eb7aa06 Author: Field G. Van Zee Date: Fri Aug 4 14:17:44 2017 -0500 Fixed a minor bug in level-3 packm management. Details: - Fixed a bug in bli_l3_packm() that caused cntl_t-cached packed mem_t entries to be released and then re-acquired unnecessarily. (In essence, the "<" operands in the conditional that guards the release-and-reacquire code block simply needed to be swapped.) The bug should have only affected performance (rather than the computed result). Thanks to Minh Quan for identifying and reporting the bug. commit 6cf68a185d83fa46d438fcef65258ace78e24b13 Author: Devin Matthews Date: Mon Jul 31 15:19:51 2017 -0500 Change lsame_ signature to match lapacke. commit 6a9bd97295cc4fb1cbcd28f69824a43c073c9a76 Author: Field G. Van Zee Date: Sat Jul 29 20:17:05 2017 -0500 Fixed pthreads compile bug with previous commit. Details: - Erroneously passed family parameter into l3int_t function despite that function not taking the parameter. Oops. commit 95adc43d800431dc0a02ca83a51426dbef641ad6 Author: Field G. Van Zee Date: Sat Jul 29 14:53:39 2017 -0500 Moved 'family' field from cntx_t to cntl_t. Details: - Removed the family field inside the cntx_t struct and re-added it to the cntl_t struct. Updated all accessor functions/macros accordingly, as well as all consumers and intermediaries of the family parameter (such as bli_l3_thread_decorator(), bli_l3_direct(), and bli_l3_prune_*()). This change was motivated by the desire to keep the context limited, as much as possible, to information about the computing environment. (The family field, by contrast, is a descriptor about the operation being executed.) - Added additional functions to bli_blksz_*() API. - Added additional functions to bli_cntx_*() API. - Minor updates to bli_func.c, bli_mbool.c. - Removed 'obj' from bli_blksz_*() API names. - Removed 'obj' from bli_cntx_*() API names. - Removed 'obj' from bli_cntl_*(), bli_*_cntl_*() API names. Renamed routines that operate only on a single struct to contain the "_node" suffix to differentiate with those routines that operate on the entire tree. - Added enums for packm and unpackm kernels to bli_type_defs.h. - Removed BLIS_1F and BLIS_VF from bszid_t definition in bli_type_defs.h. They weren't being used and probably never will be. commit a98e4aa547f61ab09dd91d11478c2a2ef9882e11 Author: Devin Matthews Date: Thu Jul 20 14:50:13 2017 -0500 Clang can't make up it's mind what to support. commit 32eb36c3e8c2add2528514272044de16faed0c8f Author: Devin Matthews Date: Thu Jul 20 12:54:58 2017 -0500 Add default #define for __has_extension. commit 2a9aa134f7c29d3d4fdc160022ff257e61885a95 Author: Devin Matthews Date: Thu Jul 20 10:04:34 2017 -0500 Add fallbacks to __sync_* or __c11_atomic_* builtins when __atomic_* is not supported. Fixes #143. commit 6f07a034d575e1e9e30bb6417b8fcb77cf301297 Author: Field G. Van Zee Date: Wed Jul 19 15:40:48 2017 -0500 Updated ar option list used by all configurations. Details: - Dropped 'u' from the list of modifiers passed into the library archiver ar. Previously, "cru" was used, while now we employ only "cr". This change was prompted by a warning observed on Ubuntu 16.04: ar: `u' modifier ignored since `D' is the default (see `U') This caused me to realize that the default mode causes timestamps to be zero, and thus the 'u' option, which causes only changed object files to be inserted, is not applicable. commit 32bc03f9eed8795cfd2f2615d1c9f8673e039c57 Author: Field G. Van Zee Date: Wed Jul 19 13:51:53 2017 -0500 Added --force-version=STRING option to configure. Details: - Added an option to configure that allows the user to force an arbitrary version string at configure-time. The help text also now describes the usage information. - Changed the way the version string is communicated to the Makefile. Previously, it was read into the VERSION variable from the 'version' file via $(shell cat ...). Now, the VERSION variable is instead set in config.mk (via a configure-substituted anchor from config.mk.in). commit befaee6dd8b2a72de9e0461fe2ec1f36e9f88f3c Author: Field G. Van Zee Date: Tue Jul 18 17:56:00 2017 -0500 Updated openmp/pthread barriers with GNU atomics. Details: - Updated the non-tree openmp and pthreads barriers defined in bli_thrcomm_openmp.c and bli_thrcomm_pthreads.c to instead call a common implementation in bli_thrcomm.c, bli_thrcomm_barrier_atomic(). This new implementation goes through the same motions as the previous codes, but protects its loads and increments with GNU atomic built-ins. These atomic statements take memory ordering parameters that allow us to specify just enough constraints for the barrier to work as intended on weakly-ordered hardware. The prior implementation was only guaranteed to work on systems with strongly- ordered memory. (Thanks to Devin Matthews for suggesting this change and his crash-course in atomics and memory ordering.) - Removed 'volatile' from structs' barrier field declarations in bli_thrcomm_*.h. - Updated bli_thrcomm_pthread.? files to use renamed struct barrier fields consistent with that of the _openmp.? files. - Updated other bli_thrcomm_* files to rename "communicator" variables to simply "comm". commit 8f739cc847fcff2ddeeb336f8b2b9d080eb16f6c Author: Field G. Van Zee Date: Mon Jul 17 19:03:22 2017 -0500 Added API to set mt environment variables. Details: - Renamed bli_env_get_nway() -> bli_thread_get_env(). - Added bli_thread_set_env() to allow setting environment variables pertaining to multithreading, such as BLIS_JC_NT or BLIS_NUM_THREADS. - Added the following convenience wrapper routines: bli_thread_get_jc_nt() bli_thread_get_ic_nt() bli_thread_get_jr_nt() bli_thread_get_ir_nt() bli_thread_get_num_threads() bli_thread_set_jc_nt() bli_thread_set_ic_nt() bli_thread_set_jr_nt() bli_thread_set_ir_nt() bli_thread_set_num_threads() - Added #include "errno.h" to bli_system.h. - This commit addresses issue #140. - Thanks to Chris Goodyer for inspiring these updates. commit 10163833075fd42be5b5b503acc855f91a484cfd Author: Marat Dukhan Date: Thu Jul 13 21:39:24 2017 -0700 Fix Emscripten builds commit c09b30d115eade72f44f37bf90aa848c9c0e79af Author: Minh Quan HO Date: Fri Jul 7 10:52:05 2017 +0200 set missing free_fp in bli_membrk_init for free-ing GEN_USE buffers The membrk's free_fp is called when releasing GEN_USE buffers, but this free_fp is not set in bli_membrk_init commit 997628ed9793c72e9ef576dd8d715cfec27c4862 Author: sthangar Date: Fri Jun 30 12:23:19 2017 +0530 Reducing the framework overhead of GEMV routines Change-Id: I83607ad767bff74e305e915b54b0ea34ec3e5684 commit ee869066168239b710ad9938bb0e1ae454883f3a Author: Kiran Varaganti Date: Tue Jul 4 12:57:32 2017 +0530 Improved efficiency of dGEMM for large matrices by reducing TLB load misses and majorly L3 cache misses. This is achieved by changing the packed block sizes of matrix A & B. Now the optimum values are MC_D = 510 and KC_D = 1024. Change-Id: I2d8bdd5f62f2d1f8782ae2997f3d7a26587d1ca4 commit 7b933b90b1859c96de49a402d48de82909bc73e5 Author: Devin Matthews Date: Tue Jun 6 20:23:17 2017 -0500 Add new SSI acknowledgment commit 3485abba4b426fbf42b146a9611a0841f6d236c6 Author: sthangar Date: Wed May 24 11:48:16 2017 +0530 Checked in the small matrix code to compute GEMM called with A transpose case Change-Id: I29f40046d43d7a4b037c1cb322503ee26495f462 commit de16beb83b29b4b9748f70db985b0fe04db85f7d Author: Devin Matthews Date: Fri May 26 14:49:31 2017 -0400 PACKDIM_MR=8 didn't work out, but messing with the prefetching helps 2%. commit 25d0e618544b6eea7d3f13c7aec513ac0139801d Author: Devin Matthews Date: Fri May 26 14:47:36 2017 -0400 Revert "Change PACKDIM_MR (double) for haswell to 8." This reverts commit 681eec913d7c2ebcff637cec5c1627ced9a92b99. commit c5bdd84b35bc2a8ebf55b7763fb56c0c945be0cb Author: Devin Matthews Date: Fri May 26 12:28:09 2017 -0500 Change PACKDIM_MR (double) for haswell to 8. commit 172789d562001293b973bbdd8015bd27d37292e8 Author: Field G. Van Zee Date: Wed May 17 13:03:52 2017 -0500 Restored deleted lines from makefile fragments. commit 3ea9bd2c8e90dbd35655fa6a5b953dfea1f308fe Author: Devin Matthews Date: Wed May 17 12:29:44 2017 -0500 Change to /bin/sh. All scripts checked with Debian's checkbashisms. Also check for clang first in auto-detect.sh. commit 49438409eedb98d3f0ebf00b8d1eee0ae45f4f8c Author: Devin Matthews Date: Wed May 17 12:27:14 2017 -0500 Remove shebangs from makefiles. commit 497e2640474c016d576dce3530fa6a66891642a0 Author: J M Dieterich Date: Tue May 16 23:11:22 2017 -0400 Fix if/else structure. Thanks to TravisCI. commit 835035c56a8de36ad25bb8d1375db170d489ef57 Author: J M Dieterich Date: Tue May 16 22:23:27 2017 -0400 Mark piledriver compilable w/ clang. commit 6cdb533472ee61af297c1f948307abbf45828887 Author: J M Dieterich Date: Tue May 16 22:12:12 2017 -0400 Mark bulldozer compilable w/ clang. commit a85697d62272da06d28cd1c947f6cf1098df6467 Author: J M Dieterich Date: Tue May 16 22:06:59 2017 -0400 Correct error message. commit e0c64cad271058688a2b999caf8c2767dc3aef7e Author: J M Dieterich Date: Tue May 16 22:03:23 2017 -0400 Indeed once can compile for carrizo also using clang. commit 4aafe0505d3f0954d095ded5459a76976e5093b4 Author: J M Dieterich Date: Tue May 16 21:50:49 2017 -0400 A bunch of shebang fixes from unportable /bin/bash to portable /usr/bin/env bash commit abaeaa68ea11e84be1810f564d6f38d506cbeb6a Author: Field G. Van Zee Date: Fri May 5 15:06:56 2017 -0500 Fixed a bug in norm1v, norm1m. Details: - Fixed a bug that manifested as improperly-computed 1-norm for vectors and matrices. This is one of the few operations in BLIS that does not have its own test module within the testsuite, hence why it went undetected for so long. The bad 1-norms were being used to normalize matrices in the testsuite after initialization, which led to some matrices containing a combination of "large" and "small" values. This tended to push the residuals computed after each test away from zero. In some cases, they were off *just* enough to the testsuite to label it a "failure". Many thanks to Jeff Hammond for reporting this bug. (Wonky details: the bug was due to improperly-defined level-0 scalar macros for abval2, an operation that computes the absolute square, or complex magnitude/modulus. Certain complex domain instances of abval2 were being incorrectly defined in terms of real-only solutions, leading to bad results. This level-0 operation forms the basis of norm1v/norm1m. absq2 was also affected, but almost nothing uses this operation.) commit cc3107ae1c2074f72b724aa748d2e5b4cb290ed5 Author: Devin Matthews Date: Thu May 4 10:35:22 2017 -0500 Setting any one of BLIS_NT_[IJ][CR] overrides BLIS_NUM_THEADS. Missing BLIS_NT_XX's are defaulted to 1. Fixes #123. commit c8ab91f70d399ee14edd30a3a5c46b24c5d2f910 Author: Field G. Van Zee Date: Wed May 3 15:04:51 2017 -0500 Disable complex 3m/4m in testsuite by default. Details: - Disabled testsuite tests of all level-3 implementations based on 3m and 4m. This will improve testing runtime on Travis CI as well as for anyone manually running the testsuite using default test parameters. Thanks to Devin Matthews for suggesting this change. commit 9700f0e5785007ddafb72a5ca83800dee61fd35c Author: Jeff Hammond Date: Tue May 2 19:25:21 2017 -0700 allow KNL build without hbwmalloc.h (i.e. emulated) we want to be able to run BLIS KNL binaries on non-KNL machines via SDE. although it is possible to install hbwmalloc implementation on such systems, it is easier not to, since obviously the performance of SDE execution is not representative so there is no reason to emulate HBW allocation. commit 17dcd5a33ff91967f67e7c0ba09b4f18754609a4 Author: Field G. Van Zee Date: Tue May 2 16:48:43 2017 -0500 Fixed stray parentheses in README citations. commit 2910d44ff9e1d951d3249313f4ab39d18ea1b48d Author: Field G. Van Zee Date: Tue May 2 16:38:43 2017 -0500 CHANGELOG update (0.2.2) commit 5ca3863220e07972fcefc6682ddd3f6e54fe4a94 Author: Field G. Van Zee Date: Tue May 2 15:48:30 2017 -0500 Fixed a trsm1m bug that affected right-side cases. Details: - Fixed a bug introduced in 1c732d3 that affected trsm1m_r. The result was nondeterministic behavior (usually segmentation faults) for certain problem sizes beyond the 1m instance of kc (e.g. 128 on haswell). The cause of the bug was my commenting out lines in bli_gemm1m_ukr_ref.c which explicitly directed the virtual gemm micro-kernel to use temporary space if the storage preference of the [real domain] gemm ukernel did not match the storage of the output matrix C. In the context of gemm, this handling is not needed because agreement between the storage pref and the matrix is guaranteed by a high-level optimization in BLIS. However, this optimization is not applied to trsm because the storage of C is not necessarily the same as the storage of the micro-panels of B--both of which are updated by the micro-kernel during a trsm operation. Thus, the guarantee of storage/preference agreement is not in place for trsm, which means we must handle that case within the virtual gemm micro-kernel. - Comment updates and a minor macro change to bli_trsm*_cntx_init() for 3m1, 4m1a, and 1m. commit 1af0b09f5c275ee7bac896cc6f36f42af721d9b5 Author: Field G. Van Zee Date: Tue May 2 12:09:39 2017 -0500 README.md update. Details: - Updated bibtex entries for 4th BLIS paper, and adds entries for 5th and 6th BLIS papers. commit db4a0bb8ba7cd697d68be8e5632371ee3e59fd63 Author: Field G. Van Zee Date: Fri Mar 17 12:07:27 2017 -0500 Whitespace reformatting to armv8a kernels file. Details: - Updated formatting of function signature/header in kernels/armv8a/3/bli_gemm_opt_4x4.c. commit e3eb01f6b990e205b15edcbaffd3d54b3ddd1ca4 Author: Field G. Van Zee Date: Tue Feb 21 15:33:39 2017 -0600 Disabled experiment-related 1m code. Details: - Commented out code in frame/ind/oapi/bli_l3_3m4m1m_oapi.c that was specifically inserted to facilitate the benchmarking of 1m block-panel and panel-block algorithms. - Updates to test/3m4m/Makefile, runme.sh script, and test_gemm.c to reflect changes used/needed during benchmarking. commit 4f61528d56eed6a139eeac9db0c44e56f2d2d136 Author: Field G. Van Zee Date: Wed Jan 25 16:25:46 2017 -0600 Added 1m-specific APIs for bp, pb gemm algorithms. Details: - Defined bli_gemmbp_cntl_create(), bli_gemmpb_cntl_create(), with the body of bli_gemm_cntl_create() replaced with a call to the former. - Defined bli_cntl_free_w_thrinfo(), bli_cntl_free_wo_thrinfo(). Now, bli_cntl_free() can check if the thread parameter is NULL, and if so, call the latter, and otherwise call the former. - Defined bli_gemm1mbp_cntx_init(), bli_gemm1mpb_cntx_init(), both in terms of bli_gemm1mxx_cntx_init(), which behaves the same as bli_gemm1m_cntx_init() did before, except that an extra bool parameter (is_pb) is used to support both bp and pb algorithms (including to support the anti-preference field described below). - Added support for "anti-preference" in context. The anti_pref field, when true, will toggle the boolean return value of routines such as bli_cntx_l3_ukr_eff_prefers_storage_of(), which has the net effect of causing BLIS to transpose the operation to achieve disagreement (rather than agreement) between the storage of C and the micro-kernel output preference. This disagreement is needed for panel-block implementations, since they induce a transposition of the suboperation immediately before the macro-kernel is called, which changes the apparent storage of C. For now, anti-preference is used only with the pb algorithm for 1m (and not with any other non-1m implementation). - Defined new functions, bli_cntx_l3_ukr_eff_prefers_storage_of() bli_cntx_l3_ukr_eff_dislikes_storage_of() bli_cntx_l3_nat_ukr_eff_prefers_storage_of() bli_cntx_l3_nat_ukr_eff_dislikes_storage_of() which are identical to their non-"eff" (effectively) counterparts except that they take the anti-preference field of the context into account. - Explicitly initialize the anti-pref field to FALSE in bli_gks_cntx_set_l3_nat_ukr_prefs(). - Added bli_gemm_ker_var1.c, which implements a panel-block macro-kernel in terms of the existing block-panel macro-kernel _ker_var2(). This technique requires inducing transposes on all operands and swapping the A and B. - Changed bli_obj_induce_trans() macro so that pack-related fields are also changed to reflect the induced transposition. - Added a temporary hack to bli_l3_3m4m1m_oapi.c that allows us to easily specify the 1m algorithm (block-panel or panel-block). - Renamed the following cntx_t-related macros: bli_cntx_get_pack_schema_a() -> bli_cntx_get_pack_schema_a_block() bli_cntx_get_pack_schema_b() -> bli_cntx_get_pack_schema_b_panel() bli_cntx_get_pack_schema_c() -> bli_cntx_get_pack_schema_c_panel() and updated all instantiations. Also updated the field names in the cntx_t struct. - Comment updates. commit 1d728ccb2394e77365e7c42683db6579c5fba014 Author: Field G. Van Zee Date: Fri Nov 25 18:29:49 2016 -0600 Implemented the 1m method. Details: - Implemented the 1m method for inducing complex domain matrix multiplication. 1m support has been added to all level-3 operations, including trsm, and is now the default induced method when native complex domain gemm microkernels are omitted from the configuration. - Updated _cntx_init() operations to take a datatype parameter. This was needed for the corresponding function for 1m (because 1m requires us to choose between column-oriented or row-oriented execution, which requires us to query the context for the storage preference of the gemm microkernel, which requires knowing the datatype) but I decided that it made sense for consistency to add the parameter to all other cntx initialization functions as well, even though those functions don't use the parameter. - Updated bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs() to take a second scalar for each blocksize entry. The semantic meaning of the two scalars now is that the first will scale the default blocksize while the second will scale the maximum blocksize. This allows scaling the two independently, and was needed to support 1m, which requires scaling for a register blocksize but not the register storage blocksize (ie: "packdim") analogue. - Deprecated bli_blksz_reduce_dt_to() and defined two new functions, bli_blksz_reduce_def_to() and bli_blksz_reduce_max_to(), for reducing default and maximum blocksizes to some desired blocksize multiple. These functions are needed in the updated definitions of bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs(). - Added support for the 1e and 1r packing schemas to packm, including 1e/1r packing kernels. - Added a minor optimization to bli_gemm_ker_var2() that allows, under certain circumstances (specifically, real domain beta and row- or column-stored matrix C), the real domain macrokernel and microkernel to be called directly, rather than using the virtual microkernel via the complex domain macrokernel, which carries a slight additional amount of overhead. - Added 1m support to the testsuite. - Added 1m support to Makefile and runme.sh in test/3m4m. Also simplified some code in test_gemm.c driver. commit 0d1b90286e29aa8b768e280b5286d92c02ad87a1 Author: Jeff Hammond Date: Tue Oct 25 21:15:26 2016 -0700 never use libm with Intel compilers Intel compilers include a highly optimized math library (libimf) that should be used instead of GNU libm. yes, this change is for ALL targets, including those that are not supported by the Intel compiler. there is no harm in doing this, and it is future-proof in the event that the Intel compilers support other architectures. commit b150870397e7aee558e61d1bd72a0c0d1d99bee8 Author: Field G. Van Zee Date: Fri Dec 8 16:08:41 2017 -0600 Removed most "old" directories. Details: - Removed the vast majority of directories named "old", which contained deprecated code that I wasn't quite ready to jettison from the source tree. commit 270c65985df849297ba1951aa3b56c03948d7775 Author: Field G. Van Zee Date: Fri Dec 8 15:21:18 2017 -0600 Modified bli_getopt() for thread-safety. Details: - Changed the interface of bli_getopt() to take a new argument, a getopt_t struct, that stores the values of optarg, optind, opterr, and optopt, and updated the implementation accordingly. (Previously, these variables were assumed to be global.) - Added a function for initializing a getopt_t struct. - Changed test_libblis.c--currently the only consumer of bli_getopt()--to utilize the new getopt_t state object. commit ce4d8fabc2e39371f89c12192fb707be82ae021a Merge: 39be59f2 e05a8dfa Author: Field G. Van Zee Date: Thu Dec 7 17:36:44 2017 -0600 Merge branch 'master' of github.com:flame/blis commit 39be59f2a8470f40475907d9dd52639b8a911a92 Author: Field G. Van Zee Date: Thu Dec 7 17:35:20 2017 -0600 Replaced several macros with static function APIs. Details: - Reimplemented several sets of get/set-style preprocessor macros with static functions, including those in the following frame/base headers: auxinfo, cntl, mbool, mem, membrk, opid, and pool. A few headers in frame/thread were touched as well: mutex_*, thrcomm, and thrinfo. commit e05a8dfa7cc7df41e966c1ad04e51c482b308b23 Merge: 79507337 4423e33d Author: dnp Date: Wed Dec 6 16:45:24 2017 -0600 Merge branch 'rt' commit 4423e33dc593115cda92c5763d756d7ad1298aa9 Author: dnp Date: Wed Dec 6 16:35:03 2017 -0600 Adding SKX kernels and configuration. commit 79507337e140daec7639f6eb3ed9cfe6e123d342 Author: Field G. Van Zee Date: Wed Dec 6 16:21:35 2017 -0600 Various checks to ensure that arch_t id is in range. Details: - Expanded checking of the arch_t id in bli_gks.c--either passed in from the caller or as returned from bli_arch_query_id()--against the expected range of id values. Thanks to Devangi Parikh for suggesting these additional sanity checks. commit fde7c1126c58373ecde83471890b257399144876 Author: Field G. Van Zee Date: Mon Dec 4 16:11:01 2017 -0600 Added 'uninstall-old-headers' target to Makefile. Details: - Defined a new 'uninstall-old-headers' target that allows users of BLIS to uninstall no-longer-needed headers left over from previous installations. - Fixed the 'uninstall-old' target so that it will install both .a and .so libraries. - Renamed 'uninstall-old' to 'uninstall-old-libs'. - Added 'uninstall-old' target (different from previous 'uninstall-old' target) that combines 'uninstall-old-libs' and 'uninstall-old-headers'. commit d4ee770bde213a87aa6049245145318324dc6b51 Author: Field G. Van Zee Date: Mon Dec 4 14:53:43 2017 -0600 Create/install monolithic cblas.h. Details: - When CBLAS is enabled at configure-time, BLIS now creates a monolithic cblas.h using the same flatten-header.sh script that was recently introduced for creating monolithic blis.h header files. The top-level Makefile will also install this cblas.h file into the install prefix alongside blis.h when the 'install' target is invoked. The two header files are compatible with one another. Regardless whether the user's source #includes cblas.h, both blis.h and cblas.h, or just blis.h, the user will get the CBLAS function prototypes and enums, as expected. commit 52f9e6f1b6468785af8947317656445d4729fc8b Merge: ab57b979 21360dd8 Author: Field G. Van Zee Date: Fri Dec 1 12:28:09 2017 -0600 Merge branch 'rt' commit 21360dd8e2c7287100645e109acaabcc6ba1140c Author: Field G. Van Zee Date: Wed Nov 29 14:11:34 2017 -0600 Fixed cntx_t packm query when ker_id > _NUM_PACKM_KERS. Details: - Fixed a subtle bug in bli_cntx_get_[un]packm_ker_dt() in which the function fails to return NULL when passed a kernel id argument that is equal to or beyond BLIS_NUM_[UN]PACKM_KERS. Instead, the function was attempting to index into the cntx_t's packm kernel array, which resulted in undefined behvaior. Thanks to Devangi Parikh for finding this bug. commit 244a6f4e66e8ff091e995f8090ce779c1928aa8b Author: Field G. Van Zee Date: Tue Nov 28 17:48:48 2017 -0600 Fixed POSIX sed non-compliance in flatten-header.sh. Details: - Changed GNU usage of 'i' and 'a' sed commands used in flatten-header.sh to POSIX-compliant usage that will work on OS X's sed. commit 45078621676833e53a2878af8f89479c4f93b8ab Author: Field G. Van Zee Date: Tue Nov 28 15:16:22 2017 -0600 Generate/compile with/install monolithic blis.h. Details: - Rewrote monolithify-header.sh (and renamed to flatten-header.sh) so that headers are inserted recursively. This improves performance by a factor of 3-4x. - Modified configure to create an 'include/' directory in which make can create a monolithic header. - Modified the top-level Makefile so that a monolithic header is generated unconditionally prior to compilation (stored in include/) and so that the single header is installed instead of the 450 or so header files that reside throughout the framework source tree. - Added "include/*/*.h" to .gitignore file. - Removed some pnacl/emscripten leftovers that I intended to include in a1caeba (mostly in testsuite/Makefile). - Trivial comment changes to frame/include/bli_f2c.h. commit 1f30b1301bf6d6047ec29e57a5fde8eb1072a0ee Author: Field G. Van Zee Date: Sat Nov 25 16:54:26 2017 -0600 Added missing framework support for x86_64 family. Details: - Added support for the x86_64 configuration family to bli_arch.c and bli_arch_config.h. Thanks to Johannes Dieterich for reporting this issue. - Bumped the default value for BLIS_SIMD_NUM_REGISTERS from 16 to 32 and the default value for BLIS_SIMD_SIZE from 32 to 64. This will support configuration families that include Skylake and newer processors without any supported needed in the bli_family_*.h file. The semantics of these values have always been "maximum" and not exact values; comments in bli_kernel_macro_defs.h and the github wiki have been adjusted accordingly. commit 9f39806c4ed484c9ed13edf96005838d977722a9 Author: Field G. Van Zee Date: Tue Nov 21 16:03:56 2017 -0600 Fixed a bug in e31f0b3/b131b9a. Details: - Erroneously placed the "don't overwrite existing blocksize" logic in bli_blksz_init*() rather than in bli_cntx_set_blkszs(). It belongs in the latter because that function copies blocksizes as-is from the blksz_t function argument to the appropriate field in the cntx_t. If the blksz_t was previously initialized selectively, based on the sign of the blocksize value passed into bli_blksz_init*(), that just leaves some fields possibly uninitialized (with garbage values), which definitely will not work. - The aforementioned logic has been moved to bli_cntx_set_blkszs() via a new function bli_blksz_copy_if_pos(), which selectively copies only the blocksizes that are greater than zero. commit b131b9a025c15f548d4c2952a9ec85eee3d139b1 Author: Field G. Van Zee Date: Tue Nov 21 14:30:26 2017 -0600 Updated configs to omit setting some blocksizes. Details: - Employ the new semantics of bli_blksz_init*() in e31f0b3 in various sub-configurations' bli_cntx_init_*() functions by passing in 0 for register and cache blocksizes that correpond to gemm microkernel datatypes that were not registered, allowing the default values set by the bli_cntx_init_*_ref() function call to remain. commit 499a4c002f895744ecaf81ef7f62d2d6d0d7d594 Merge: e31f0b3e 6c3ba502 Author: Field G. Van Zee Date: Tue Nov 21 14:25:08 2017 -0600 Merge branch 'rt' of github.com:flame/blis into rt commit e31f0b3e2dba19ca8a2946bc21beb136a42d0f57 Author: Field G. Van Zee Date: Tue Nov 21 14:21:25 2017 -0600 Subtle update to bli_blksz_init*() API. Details: - Updated the semantics of bli_blksz_init() and bli_blksz_init_ed() so that non-positive blocksize values are ignored entirely. This provides an easy way to indicate that certain existing values should not be touched by the update. Thanks to Devangi Parikh for feedback that led to these changes. commit 6c3ba502a11f87bc67555d26154cfd39d0af1bac Author: Field G. Van Zee Date: Tue Nov 21 13:50:53 2017 -0600 Added 'x86_64' sub-config directory. Details: - Added missing x86_64 configuration directory, which was intended to be part of b7ca580. - Added -Wfatal-errors compiler warning flag to all configurations so that compilation stops after the first error. - Changed the vectorization flags for intel64 configuration to be compatible with 'penryn', the oldest sub-config included in that family. - Changed the vectorization flags for penryn to target the 'core2' microarchitecture and ssse3. commit 25eee3cc49b0631812485d4d5ceef0c23ed1b6dd Author: Field G. Van Zee Date: Tue Nov 21 12:34:20 2017 -0600 Added a dummy file to kernels/generic. Details: - Added a dummy file to kernels/generic, which was previously empty, so that git would begin tracking the otherwise-empty directory. This directory's existence is necessary for proper execution of configure for any configuration family that contains the 'generic' sub-configuration. Thanks to Johannes Dieterich for reporting the issue that led to this fix. commit ef024ce4cafa217669eaabb31ff8ab6df93cca05 Author: Field G. Van Zee Date: Mon Nov 20 18:08:29 2017 -0600 More tweaks to monolithify-header.sh Details: - Further fixes monolithify-header.sh script. - Removed unnecessary #include "blis.h" from frame/3/bli_l3_packm.h. commit 5028e7dec269b62895511453272585da36e591b5 Author: Field G. Van Zee Date: Mon Nov 20 17:00:37 2017 -0600 Second attempt to implement travis_wait. Details: - Corrected accidental misplacement of the travis_wait prefix (on the wrong line of the .travis.yml file) in commit 13e5d91. commit 13e5d9107b3763cba46fb1bae87476852601b47c Author: Field G. Van Zee Date: Mon Nov 20 15:57:06 2017 -0600 Added travis_wait prefix to testsuite via Travis. Details: - It appears that Travis CL has implemented a new policy that results in a test failing if it does not produce any output for more than 10 minutes. (Two test instances are now failing in Travis despite the most recent commit not affecting the library or testsuite.) This issue can be worked around by executing the test run via travis_wait, which takes an optional time parameter. This commit attempts to use 'travis_wait 30' in the .travis.yml file to prevent the early failure at 10 minutes. commit a1caeba0ea79c8fecb1abadca1f91c6367ab3afb Author: Field G. Van Zee Date: Mon Nov 20 13:31:20 2017 -0600 Removed pnacl, emscripten support from Makefile. commit 78199c539beaa50f37893add220261ce0dcb921a Merge: b3d8ab2e ab57b979 Author: praveeng Date: Mon Nov 20 15:51:20 2017 +0530 Merge master code till 01-Nov-2017 to amd-staging Change-Id: I40b53f876db84c8b947b3f2385c9b882245c6603 commit 9df6dda9ec51a0d40166169d2d8a2f84b42266e6 Author: Field G. Van Zee Date: Sat Nov 18 19:03:26 2017 -0600 Improvements, bugfixes to monolithify-header.sh. commit 21d26201f90b884eb8d5de279ed74bbd244ffcb5 Merge: 43baa3b3 b7ca5806 Author: Field G. Van Zee Date: Sat Nov 18 14:16:53 2017 -0600 Merge branch 'rt' of github.com:flame/blis into rt commit 43baa3b327d5ae1e2ba619432687b4dd849b05e3 Author: Field G. Van Zee Date: Sat Nov 18 14:14:44 2017 -0600 Removed unnecessary flags for generic config. Details: - Removed -D_POSIX_C_SOURCE=200112L and -m64 flags from make_defs.mk file of generic sub-configuration. These flags are generally not necessary, and particularly not desirable for the generic configuration since they unnecessarily restrict the environments in which the configuration can be built. commit b7ca580618f9382b7982168fd035ed058f83e4c2 Author: iotamudelta Date: Sat Nov 18 14:56:05 2017 -0500 [WIP] Add x86 and x86_64 processor families. (#154) * Add x86 and x86_64 processor families. * Use generic config as fallback for more families. After discussion with fgvanzee, a) it's "generic" and 2) use it for all the families as a fallback. Goal is that if a specific CPU is not yet supported by a family (say a new Intel microarchitecture on x86_64), it'll fall through to still work with the slower "generic" kernels commit 870597d1663aaba1b74d7654b1d4946280aa0d3f Author: Field G. Van Zee Date: Fri Nov 17 17:06:42 2017 -0600 Added bash script for creating monolithic headers. Details: - Added a new script, monolithify-header.sh, to the 'build' directory. This script recursively replaces all #include directives in a selected file with the contents of the header files referenced by each directive. The idea is to "flatten" a tree of .h files into a single file, with the script acting as a C preprocessor that only processes #include directives. commit c76f77f4cc1e71988251c5e63cf6ef137477bf9c Author: Field G. Van Zee Date: Fri Nov 17 15:10:52 2017 -0600 Removed unnecessary #include "blis.h" from header. Details: - Removed an errant #include "blis.h directive from bli_cntx_ind_stage.h. The generaly policy is that no header file in BLIS should include blis.h. This will be important in the near future when using a tool to recursively create a monolithic blis.h file from its consitutent headers. commit 2bb9bc6e9536fa239fbc19a7efaaf151116e15b4 Author: Field G. Van Zee Date: Fri Nov 17 13:50:14 2017 -0600 Miscellaneous tweaks to gks, rt functionality. Details: - Updated bli_cpuid_query_id() so that BLIS_ARCH_GENERIC is always returned if the hardware fails to test positive for any supported sub-configuration. - Defined bli_gks_init_ref_cntx(), which will call the context initialization function bli_cntx_init_configname() for the sub-configuration 'configname' associated with the arch_t id returned by bli_arch_query_id(). This makes initializing a reference context easy for experts who wish to construct those contexts. commit b3d8ab2ea02c127ab241532abc214624f35bfaab Merge: 189ffbb0 fe71c06e Author: Santanu Thangaraj Date: Wed Nov 15 01:33:12 2017 -0500 Merge "Added AMD copyright line to the changed files in last 3 commits" into amd-staging commit fe71c06e42b072407c83112779055b0afb67173d Author: Nisanth M P Date: Wed Nov 15 11:11:17 2017 +0530 Added AMD copyright line to the changed files in last 3 commits Change-Id: I37d5dbbbe1b199e07529610a5e9cc9e49d067c66 commit d5bf79e50bf97072bbe7117c86b7c45e6e707ea0 Author: Field G. Van Zee Date: Mon Nov 13 14:24:29 2017 -0600 Miscellaneous tweaks and fixes. Details: - Fixed incorrect calling sequence in bli_cntx_init_knl.c--an instance of bli_blksz_init_easy() that should have been bli_blksz_init(). - Fixed a bug in code that is supposed to output the list of sub-directories in the 'config' directory when configure script is run with no arguments. - Expanded the output of "make showconfig" to include more info from config.mk. - Minor changes to build/auto-detect/cpuid_x86.c, mostly in preparation for someone to add excavator and zen support. - Added a link to the ConfigurationHowTo wiki to config_registry. - Other minor tweaks to configure. commit 673e5184030532c4ebd9fdeecbaa6442bb3ad54f Merge: 2c51356a 8f150f28 Author: Field G. Van Zee Date: Wed Nov 1 17:37:42 2017 -0500 Merge branch 'rt' of github.com:flame/blis into rt commit 2c51356a8b2699c99f9507c80d69c08a35d45fe3 Author: Field G. Van Zee Date: Wed Nov 1 17:37:02 2017 -0500 Implemented runtime hardware detection via cpuid. Details: - Added runtime support for selecting an appropriate arch_t value based on the results of the cpuid instruction (for x86_64). This allows deferral of choosing a context (kernels, blocksizes, etc.) until runtime, which allows BLIS to be built with support for multiple microarchitectures. Currently, only amd64 and intel64 configurations are registered in the config_registry; however, one could create custom configuration families to support arbitrary sets of x86_64 microarchitectures. - Current Intel microarchitectures supported via cpuid are knl, haswell, sandybridge, and penryn. - Current AMD microarchitectures supported via cpuid are: zen, excavator, steamroller, piledriver, and bulldozer. commit ab57b979046479bcda7f83165838a80117c2ad95 Author: Field G. Van Zee Date: Wed Nov 1 11:51:41 2017 -0500 Revert to default SIMD alignment for bulldozer. Details: - Removed the default-overriding #define of BLIS_SIMD_ALIGN_SIZE set in config/bulldozer/bli_kernel.h. Not sure where this value came from, but it would seem to allow for insufficient starting address alignment for any matrices created via bli_malloc_user(), such as via bli_obj_create(). Thanks to Rene Sitt for reporting the behavior that led us to this bug. - This commit is a manual patch of the same fix made to the 'rt' branch in 8f150f2. commit 8f150f28a678c4a0c1591400177ad7cca81fcaec Author: Field G. Van Zee Date: Wed Nov 1 11:41:45 2017 -0500 Revert to default SIMD alignment for bulldozer. Details: - Removed the default-overriding #define of BLIS_SIMD_ALIGN_SIZE set in bli_family_bulldozer.h. Not sure where this value came from, but it would seem to allow for insufficient starting address alignment for any matrices created via bli_malloc_user(), such as via bli_obj_create(). Thanks to Rene Sitt for reporting the behavior that led us to this bug. commit e3f10557caf114441fbfff990e3ce3576c177bdc Author: Field G. Van Zee Date: Mon Oct 30 13:37:54 2017 -0500 Use perl for some substitution for OS X compatibility. Details: - Discovered that sed commands where the replacement string contains '\n' are problematic with the version of sed present in OS X. For these cases cases in the configure script, we instead use 'perl -pe' for search-and-replace functionality. - Various other minor comment/whitespace tweaks to configure. - Removed remaining lines of code related to setting/checking variables to track "unregistered" configurations. commit dd45cfdfc3d8f9acf4cf7f69138d9b83dafc8842 Merge: 3e4f42a4 f60c827b Author: Field G. Van Zee Date: Mon Oct 30 12:23:05 2017 -0500 Merge branch 'master' into rt commit f60c827ba95f452c8454fb914f5564f4895bf644 Author: Devin Matthews Date: Mon Oct 30 10:04:42 2017 -0500 Fix CVECFLAGS for bulldozer config. commit 3e4f42a4d2ebb37b95988933d92e561c5b2cc201 Author: Field G. Van Zee Date: Fri Oct 27 11:41:37 2017 -0500 Typecast l1mkr_t enum value prior to comparison. Details: - Typecast l1mkr_t enum value in bli_cntx.h to guint_t before testing for out-of-range value. This is an attempt to pacify a strange warning from clang on OS X that is seemingly the result of the following compiler warning flag: -Wtautological-constant-out-of-range-compare commit aec6e038d942d35b81bbd723a640cce2c054fb8e Author: Field G. Van Zee Date: Thu Oct 26 16:12:36 2017 -0500 Removed associative arrays from configure. Details: - Implemented a replacement for associative arrays in the configure script that does not utilize arrays, and therefore works in pre-4.0 versions of bash. (It appears that Mac OS X will be stuck with version 3.2 indefinitely due to bash switching to the GPL 3.0 license starting with version 4.0.) commit 189ffbb0d37262b21acddc0d35b4a22f2cbbca94 Merge: 06e0e635 3eb44f67 Author: Santanu Thangaraj Date: Wed Oct 25 02:00:30 2017 -0400 Merge changes Ie115b206,I7ce6cfa2,Iff59b6f4 into amd-staging * changes: Adding __attribute__((constructor/destructor)) for CLANG case. Thread Safety: Move bli_init() before and bli_finalize() after main() Thread safety: Make the global induced method status array local to thread commit 3eb44f67618b91ae5f5f0aaaba67e38f16042ee4 Author: Nisanth M P Date: Tue Oct 24 16:36:36 2017 +0530 Adding __attribute__((constructor/destructor)) for CLANG case. CLANG supports __attribute__, but its documentation doesn't mention support for constructor/destructor. Compiling with clang and testing shows that it does support this. Change-Id: Ie115b20634c26bda475cc09c20960d687fb7050b commit 07c352188bf5265af242255f8e6fcb97050d973d Author: Field G. Van Zee Date: Mon Oct 23 16:59:22 2017 -0500 Added "generic" configuration. Details: - Added a "generic" configuration that leaves the default blocksizes and kernels unchanged. This replaces the older "reference" configuration. Updated auto-detect script and code accordingly. - Added support for generic configuration to arch_t (bli_type_defs.h), bli_gks_init() (bli_gks.c), and bli_arch_config.h - Moved bli_arch_query_id() to bli_arch.c (and prototype to bli_arch.h). - Whitespace changes to configurations' make_defs.mk files. commit c1a98d6f70608b02a1e6bcad6ba020a60773dace Author: Field G. Van Zee Date: Mon Oct 23 14:24:41 2017 -0500 Minor update to .travis.yml file. commit 75b9383f01caa8b83f8be0117e15085b0d807ba6 Author: Field G. Van Zee Date: Fri Oct 20 16:41:22 2017 -0500 Minor header renaming ahead of bli_arch.c. Details: - Renamed the various configurations' "bli_arch_.h" header files (replacing "arch" with "family") to free up the 'bli_arch' namespace for a different purpose (hardware detection). - Renamed "bli_arch.h" and "bli_arch_pre_macro_defs.h" in frame/include to "bli_arch_config.h" and "bli_arch_config_pre.h", respectively. commit 482af51add26d5ed103c3e3f167657f273b32c7a Author: Field G. Van Zee Date: Fri Oct 20 15:44:26 2017 -0500 Fixed 'make test' target from top-level Makefile. Details: - Updated the top-level Makefile's build rule for testsuite object files to properly obtain CFLAGS via get-frame-cflags-for() function instead of simply using the $(CFLAGS) variable (which is empty). This means that 'make test' should now work as expected. commit 3c269f700d207efe6c04193f09d519c88c1d4045 Author: Field G. Van Zee Date: Fri Oct 20 13:57:21 2017 -0500 Makefile updates for test drivers, testsuite. Details: - Fixed semi-broken testsuite Makefile and very-broken test driver Makefiles, as well as those for test/3m4m, test/thread_ranges, and test/exec_sizes sub-directories. - Factored out much of the top-level Makefile into common.mk. A Makefile needs only set DIST_PATH to the relative path to the top level of the BLIS source distribution before including common.mk in order to acquire all of the definitions typically needed in a Makefile that tests BLIS. commit 0557189d463446b4c32077cdcf0467fa71ca68dc Author: Field G. Van Zee Date: Wed Oct 18 15:05:27 2017 -0500 Minor updates to .travis.yml, configure script. commit 2553734d1d62043793f4e783a027349ef6d4d563 Merge: 453deb29 37534279 Author: Field G. Van Zee Date: Wed Oct 18 13:46:50 2017 -0500 Merge branch 'master' into rt commit 375342799cbae981c28d831793af588d7951f3f6 Author: Field G. Van Zee Date: Wed Oct 18 13:41:25 2017 -0500 Removed a duplicate bli_avx512_macros.h header. Details: - Removed a duplicate header file that was causing problems during installation for the 'knl' configuration. Thanks to Victor Eijkhout for reporting this issue. commit 453deb29068889698e274f269c9aa90eea99b527 Author: Field G. Van Zee Date: Wed Oct 18 13:29:32 2017 -0500 Implemented runtime kernel management. Details: - Reworked the build system around a configuration registry file, named config_registry', that identifies valid configuration targets, their constituent sub-configurations, and the kernel sets that are needed by those sub-configurations. The build system now facilitates the building of a single library that can contains kernels and cache/register blocksizes for multiple configurations (microarchitectures). Reference kernels are also built on a per-configuration basis. - Updated the Makefile to use new variables set by configure via the config.mk.in template, such as CONFIG_LIST, KERNEL_LIST, and KCONFIG_MAP, in determining which sub-configurations (CONFIG_LIST) and kernel sets (KERNEL_LIST) are included in the library, and which make_defs.mk files' CFLAGS (KCONFIG_MAP) are used when compiling kernels. - Reorganized 'kernels' directory into a "flat" structure. Renamed kernel functions into a standard format that includes the kernel set name (e.g. 'haswell'). Created a "bli_kernels_.h" file in each kernels sub-directory. These files exist to provide prototypes for the kernels present in those directories. - Reorganized reference kernels into a top-level 'ref_kernels' directory. This directory includes a new source file, bli_cntx_ref.c (compiled on a per-configuration basis), that defines the code needed to initialize a reference context and a context for induced methods for the microarchitecture in question. - Rewrote make_defs.mk files in each configuration so that the compiler variables (e.g. CFLAGS) are "stored" (renamed) on a per-configuration basis. - Modified bli_config.h.in template so that bli_config.h is generated with #defines for the config (family) name, the sub-configurations that are associated with the family, and the kernel sets needed by those sub-configurations. - Deprecated all kernel-related information in bli_kernel.h and transferred what remains to new header files named "bli_arch_.h", which are conditionally #included from a new header bli_arch.h. These files are still needed to set library-wide parameters such as custom malloc()/free() functions or SIMD alignment values. - Added bli_cntx_init_.c files to each configuration directory. The files contain a function, named the same as the file, that initializes a "native" context for a particular configuration (microarchitecture). The idea is that optimized kernels, if available, will be initialized into these contexts. Other fields will retain pointers to reference functions, which will be compiled on a per-configuration basis. These bli_cntx_init_*() functions will be called during the initialization of the global kernel structure. They are thought of as initializing for "native" execution, but they also form the basis for contexts that use induced methods. These functions are prototyped, along with their _ref() and _ind() brethren, by prototype-generating macros in bli_arch.h. - Added a new typedef enum in bli_type_defs.h to define an arch_t, which identifies the various sub-configurations. - Redesigned the global kernel structure (gks) around a 2D array of cntx_t structures (pointers to cntx_t, actually). The first dimension is indexed over arch_t and the inner dimension is the ind_t (induced method) for each microarchitecture. When a microarchitecture (configuration) is "registered" at init-time, the inner array for that configuration in the 2D array is initialized (and allocated, if it hasn't been already). The cntx_t slot for BLIS_NAT is initialized immediately and those for other induced method types are initialized and cached on-demand, as needed. At cntx_t registration, we also store function pointers to cntx_init functions that will initialize (a) "reference" contexts and (b) contexts for use with induced methods. We don't cache the full contexts for reference contexts since they are rarely needed. The functions that initialize these two kinds of contexts are generated automatically for each targeted sub-configuration from cpp-templatized code at compile-time. Induced method contexts that need "stage" adjustments can still obtain them via functions in bli_cntx_ind_stage.c. - Added new functions and functionality to bli_cntx.c, such as for setting the level-1f, level-1v, and packm kernels, and for converting a native context into one for executing an induced method. - Moved the checking of register/cache blocksize consistency from being cpp macros in bli_kernel_macro_defs.h to being runtime checks defined in bli_check.c and called from bli_gks_register_cntx() at the time that the global kernel structure's internal context is initialized for a given microarchitecture/configuration. - Deprecated all of the old per-operation bli_*_cntx.c files and removed the previous operation-level cntx_t_init()/_finalize() invocations. Instead, we now query the gks for a suitable context, usually via bli_gks_query_cntx(). - Deprecated support for the 3m2 and 3m3 induced methods. (They required hackery that I was no longer willing to support.) - Consolidated the 1e and 1r packm kernels for any given register blocksize into a single kernel that will branch on the schema and support packing to both formats. - Added the cntx_t* argument to all packm kernel signatures. - Deprecated the local function pointer array in all bli_packm_cxk*.c files and instead obtain the packm kernel from the cntx_t. - Added bli_calloc_intl(), which serves as the calloc-equivalent to to bli_malloc_intl(). Useful when we wish to allocate and initialize to zero/NULL. - Converted existing cpp macro functions defined in bli_blksz.h, bli_func.h, bli_cntx.h into static functions. commit 4607aac297e55ad540cbe5fffbe02e6b1889c181 Author: Nisanth M P Date: Mon Oct 16 22:06:57 2017 +0530 Thread Safety: Move bli_init() before and bli_finalize() after main() BLIS provides APIs to initialize and finalize its global context. One application thread can finalize BLIS, while other threads in the application are stil using BLIS. This issue can be solved by removing bli_finalize() from API. One way to do this is by getting bli_finalize() to execute by default after application exits from main(). GCC supports this behaviour with the help of __attribute__((destructor)) added to the function that need to be executed after main exits. Similarly bli_init() can be made to run before application enters main() so that application need not call it. Change-Id: I7ce6cfa28b384e92c0bdf772f3baea373fd9feac commit 0f5ce26fc597cda6e8ae93a7526f52eb8cba01e9 Author: Nisanth M P Date: Mon Oct 16 21:07:50 2017 +0530 Thread safety: Make the global induced method status array local to thread BLIS retains a global status array for induced methods, and provides APIs to modify this state during runtime. So, one application thread can modify the state, before another starts the corresponding BLIS operation. This patch solves this issue by making the induced method status array local to threads. Change-Id: Iff59b6f473771344054c010b4eda51b7aa4317fe commit b882648af87deb1b365fc6b3e94151e69c5ccfa4 Merge: 8b379069 e02d3cb8 Author: Field G. Van Zee Date: Wed Oct 11 16:32:21 2017 -0500 Merge branch 'master' into rt commit 06e0e6351acb9481225975ad9a4e0b8925336621 Author: sthangar Date: Thu Sep 28 12:15:36 2017 +0530 The inner loop paralleization is turned off by default, the JR and IR loop parameters are set to 1 by default Change-Id: I8c3c2ecbbd636259f6ffb92768ec04148205c3e5 commit e02d3cb84190a345ebe9b32f53db03a1838976b1 Author: Field G. Van Zee Date: Tue Sep 26 19:02:53 2017 -0500 Fixed a pthread typo in previous commit. Details: - Misnamed 'pthread_mutex_t' type in bli_memsys.c as 'thread_mutex_t'. commit f5962a1aae0fb3c9be104d0035c0d73210e7f670 Author: Field G. Van Zee Date: Tue Sep 26 17:00:04 2017 -0500 Fixed bugs in gemm/gemmtrsm ukr tests in testsuite. Details: - Fixed a bug in gemmtrsm test module that was due to improper partitioning into a k x k triangular matrix for the purposes of obtaining an mr x k micropanel of A with which to test. - Fixed a bug in gemm and gemmtrsm test modules that would only manifest for very large k (depending on the product of mr x kc on that architecture). The bug arose from the fact that the test module was triggering the allocation of blocks from the internal memory pools, which are limited in size. This allocation imposes an implicit assumption that the micro- panel being tested with will fit inside, and this assumption is violated for large values of k. Arbitrarily large k may now be tested for both operation tests. - Added OpenMP/pthread critical sections around the setting or getting of statuses from the induced method operation lookup table in bli_l3_ind.c. - Added the 'static' keyword to all pthread_mutex_t global variables in BLIS. - Thanks to Nisanth Padinharepatt of AMD for reporting the first and third issues. commit 8e917b256ca2d4bcdc059fe98d86be8775c69561 Author: Field G. Van Zee Date: Sat Sep 9 14:10:15 2017 -0500 Updated bibtex info for BLIS5 (3m4m) article. commit 7be887057358df4978a4833eeae0c17e15acd9d1 Author: Nisanth M P Date: Mon Aug 28 17:38:22 2017 +0530 Merging "Adding auto hardware detection for Zen" Change-Id: Id450fb0c4f91a5cd5cbdc06970f4f9ed28dd8520 commit e056d810d16621891ead032603de0c2105cfc0f7 Author: sthangar Date: Mon Aug 28 16:44:42 2017 +0530 Bug fix for the testsuite build failing Change-Id: I7cd8c9d187387c48b2564e45cbfb8df985e93d77 commit 83796b7caf745fafc263e9e5e1bfcf5eff00c025 Merge: 8176f4e4 d1ee7762 Author: Kiran Varaganti Date: Mon Aug 28 05:23:28 2017 -0400 Merge "Adding auto hardware detection for Zen" into amd-staging commit d1ee776202b26874333af7a91b6d2686342c4c81 Author: sthangar Date: Wed Aug 23 13:01:14 2017 +0530 Adding auto hardware detection for Zen Change-Id: I40ce6705dd66b35000c4ccddffad1c5b65998caf commit 8176f4e43872714b997f1a5f83056daadb0ff1a5 Merge: 12413018 adafe974 Author: praveeng Date: Mon Aug 28 12:21:16 2017 +0530 resolving conflicts bli_gemm_front.c and LICENCE Change-Id: Id24ce53896d4c1c7ceccc3e004014a0ecceb5474 commit 57e1e5cd51e7ffe8612c96a20b6a041b55426ddb Merge: f86ce54d d6ef56c6 Author: Nisanth M P Date: Tue Aug 22 17:07:44 2017 +0530 Merge AMD authored changes commit adafe974b4bc3fc0663bc2f6f4ce2fde71a97988 Merge: f86ce54d 7dc78b49 Author: Devin Matthews Date: Tue Aug 15 15:17:21 2017 -0500 Merge pull request #150 from devinamatthews/vzeroupper Add vzeroupper to Intel AVX kernels. commit 7dc78b49f97e6b3cd6d72fcdc588ace534d0e700 Author: Devin Matthews Date: Tue Aug 15 10:02:25 2017 -0500 Add vzeroupper to Intel AVX kernels. commit f86ce54d6f315006984534fe29e47a2deaacc9f5 Author: Field G. Van Zee Date: Thu Aug 10 16:24:28 2017 -0500 Removed trailing enum commas from bli_type_defs.h. Details: - Removed trailing commas from enums in bli_type_defs.h. Thanks to Erling Andersen for pointing out this inconsistency and suggesting the change. commit 60a1eeb2317939d732b9eb6ff1e0d6d668c9a1e5 Author: Field G. Van Zee Date: Sat Aug 5 13:04:31 2017 -0500 Added edge handling to _determine_blocksize_b(). Details: - Added explicit handling of situations where i == dim to bli_determine_blocksize_b_sub(). This isn't actually needed by any current use case within BLIS, but handling the situation is nonetheless prudent. Thanks to Minh Quan for reporting this issue and requesting the fix. commit b01c80829907d50ec79977fba8e7b53cfe7db80a Author: Field G. Van Zee Date: Fri Aug 4 14:17:44 2017 -0500 Fixed a minor bug in level-3 packm management. Details: - Fixed a bug in bli_l3_packm() that caused cntl_t-cached packed mem_t entries to be released and then re-acquired unnecessarily. (In essence, the "<" operands in the conditional that guards the release-and-reacquire code block simply needed to be swapped.) The bug should have only affected performance (rather than the computed result). Thanks to Minh Quan for identifying and reporting the bug. commit 8b379069fcd4811669855b1248ece831f190dff6 Merge: 1f3a5819 05925dd5 Author: Field G. Van Zee Date: Tue Aug 1 15:30:40 2017 -0500 Merge branch 'master' into rt commit 05925dd5d30e8f403bb671ce33029170d65ce7c0 Merge: 803bbef0 cecdc05d Author: Devin Matthews Date: Tue Aug 1 09:31:02 2017 -0500 Merge pull request #146 from devinamatthews/master Change lsame_ signature to match lapacke. commit cecdc05d2834786a84ff85775d3f99a958c0765a Author: Devin Matthews Date: Mon Jul 31 15:19:51 2017 -0500 Change lsame_ signature to match lapacke. commit 803bbef0a386dd0571ad389f69d55154dbfe3c50 Author: Field G. Van Zee Date: Sat Jul 29 20:17:05 2017 -0500 Fixed pthreads compile bug with previous commit. Details: - Erroneously passed family parameter into l3int_t function despite that function not taking the parameter. Oops. commit c63980f4ca750618f359031d0691289b1abf5146 Author: Field G. Van Zee Date: Sat Jul 29 14:53:39 2017 -0500 Moved 'family' field from cntx_t to cntl_t. Details: - Removed the family field inside the cntx_t struct and re-added it to the cntl_t struct. Updated all accessor functions/macros accordingly, as well as all consumers and intermediaries of the family parameter (such as bli_l3_thread_decorator(), bli_l3_direct(), and bli_l3_prune_*()). This change was motivated by the desire to keep the context limited, as much as possible, to information about the computing environment. (The family field, by contrast, is a descriptor about the operation being executed.) - Added additional functions to bli_blksz_*() API. - Added additional functions to bli_cntx_*() API. - Minor updates to bli_func.c, bli_mbool.c. - Removed 'obj' from bli_blksz_*() API names. - Removed 'obj' from bli_cntx_*() API names. - Removed 'obj' from bli_cntl_*(), bli_*_cntl_*() API names. Renamed routines that operate only on a single struct to contain the "_node" suffix to differentiate with those routines that operate on the entire tree. - Added enums for packm and unpackm kernels to bli_type_defs.h. - Removed BLIS_1F and BLIS_VF from bszid_t definition in bli_type_defs.h. They weren't being used and probably never will be. commit 07837395560d413a1ba828163b41186e21a7bcfe Merge: ca1d1d85 ad8610b4 Author: Field G. Van Zee Date: Fri Jul 21 16:49:48 2017 -0500 Merge pull request #139 from Maratyszcza/emscripten Fix Emscripten builds commit ad8610b4415cc7982804d74f9aba29875e9e2b6c Merge: 8772a0b3 ca1d1d85 Author: Field G. Van Zee Date: Fri Jul 21 15:18:33 2017 -0500 Merge branch 'master' into emscripten commit ca1d1d8560c9ab1a7e3b0ac43ac70d08075bf904 Merge: b537b5bb 733faf84 Author: Devin Matthews Date: Fri Jul 21 09:49:50 2017 -0500 Merge pull request #144 from devinamatthews/fix_atomics_on_bgq Add fallbacks to __sync_* or __c11_atomic_* builtins... commit 733faf848dcc54834fcdfbb0185dc644978d8864 Author: Devin Matthews Date: Thu Jul 20 14:50:13 2017 -0500 Clang can't make up it's mind what to support. commit 7425d0744d9e9cd29a887120e57c2b43ba287040 Author: Devin Matthews Date: Thu Jul 20 12:54:58 2017 -0500 Add default #define for __has_extension. commit b537b5bbe8cbee459a85bac11458498ae2bce4de Merge: 1f1ec0db 7f41bb0a Author: Devin Matthews Date: Thu Jul 20 10:58:39 2017 -0500 Merge pull request #133 from devinamatthews/haswell-packdim Fix prefetching in haswell ukernel commit 8823f91a14638ce6f4e45e67df03212bb61609d6 Author: Devin Matthews Date: Thu Jul 20 10:04:34 2017 -0500 Add fallbacks to __sync_* or __c11_atomic_* builtins when __atomic_* is not supported. Fixes #143. commit 1f1ec0db9380b87679d5c771c4594daa1cfc5f0d Author: Field G. Van Zee Date: Wed Jul 19 15:40:48 2017 -0500 Updated ar option list used by all configurations. Details: - Dropped 'u' from the list of modifiers passed into the library archiver ar. Previously, "cru" was used, while now we employ only "cr". This change was prompted by a warning observed on Ubuntu 16.04: ar: `u' modifier ignored since `D' is the default (see `U') This caused me to realize that the default mode causes timestamps to be zero, and thus the 'u' option, which causes only changed object files to be inserted, is not applicable. commit 5caaba2d61cbbc36d63102a0786ece28ff797f72 Author: Field G. Van Zee Date: Wed Jul 19 13:51:53 2017 -0500 Added --force-version=STRING option to configure. Details: - Added an option to configure that allows the user to force an arbitrary version string at configure-time. The help text also now describes the usage information. - Changed the way the version string is communicated to the Makefile. Previously, it was read into the VERSION variable from the 'version' file via $(shell cat ...). Now, the VERSION variable is instead set in config.mk (via a configure-substituted anchor from config.mk.in). commit 13175c5fb70fb6a378d5fff6ecede62e5ea6a1f6 Author: Field G. Van Zee Date: Tue Jul 18 17:56:00 2017 -0500 Updated openmp/pthread barriers with GNU atomics. Details: - Updated the non-tree openmp and pthreads barriers defined in bli_thrcomm_openmp.c and bli_thrcomm_pthreads.c to instead call a common implementation in bli_thrcomm.c, bli_thrcomm_barrier_atomic(). This new implementation goes through the same motions as the previous codes, but protects its loads and increments with GNU atomic built-ins. These atomic statements take memory ordering parameters that allow us to specify just enough constraints for the barrier to work as intended on weakly-ordered hardware. The prior implementation was only guaranteed to work on systems with strongly- ordered memory. (Thanks to Devin Matthews for suggesting this change and his crash-course in atomics and memory ordering.) - Removed 'volatile' from structs' barrier field declarations in bli_thrcomm_*.h. - Updated bli_thrcomm_pthread.? files to use renamed struct barrier fields consistent with that of the _openmp.? files. - Updated other bli_thrcomm_* files to rename "communicator" variables to simply "comm". commit 0e58ba1b3aa84700ca51a96f1c0eed6067562fba Author: Field G. Van Zee Date: Mon Jul 17 19:03:22 2017 -0500 Added API to set mt environment variables. Details: - Renamed bli_env_get_nway() -> bli_thread_get_env(). - Added bli_thread_set_env() to allow setting environment variables pertaining to multithreading, such as BLIS_JC_NT or BLIS_NUM_THREADS. - Added the following convenience wrapper routines: bli_thread_get_jc_nt() bli_thread_get_ic_nt() bli_thread_get_jr_nt() bli_thread_get_ir_nt() bli_thread_get_num_threads() bli_thread_set_jc_nt() bli_thread_set_ic_nt() bli_thread_set_jr_nt() bli_thread_set_ir_nt() bli_thread_set_num_threads() - Added #include "errno.h" to bli_system.h. - This commit addresses issue #140. - Thanks to Chris Goodyer for inspiring these updates. commit 8772a0b33a90154c80d88b381dcdd66f824e041f Author: Marat Dukhan Date: Thu Jul 13 21:39:24 2017 -0700 Fix Emscripten builds commit 72c8b49bb8d3b9370b2cc37718da22f065de9c57 Merge: 70cc825b ba7cada5 Author: Field G. Van Zee Date: Wed Jul 12 14:58:12 2017 -0500 Merge pull request #138 from hominhquan/membrk_set_free_fp Set missing free_fp in bli_membrk_init for free-ing GEN_USE buffers commit ba7cada51a238d320528e3504ed0f0a17a6b022a Author: Minh Quan HO Date: Fri Jul 7 10:52:05 2017 +0200 set missing free_fp in bli_membrk_init for free-ing GEN_USE buffers The membrk's free_fp is called when releasing GEN_USE buffers, but this free_fp is not set in bli_membrk_init commit 1241301869957c96f16a2c6567e3ad70afa547de Merge: 969b67e8 25ead66f Author: Kiran Varaganti Date: Wed Jul 5 02:24:00 2017 -0400 Merge "Reducing the framework overhead of GEMV routines" into amd-staging commit 25ead66fb78557f73af48bac305724d5d8aa3309 Author: sthangar Date: Fri Jun 30 12:23:19 2017 +0530 Reducing the framework overhead of GEMV routines Change-Id: I83607ad767bff74e305e915b54b0ea34ec3e5684 commit 969b67e8800fbd5d14a086606f3b5afbf66ed093 Author: Kiran Varaganti Date: Tue Jul 4 12:57:32 2017 +0530 Improved efficiency of dGEMM for large matrices by reducing TLB load misses and majorly L3 cache misses. This is achieved by changing the packed block sizes of matrix A & B. Now the optimum values are MC_D = 510 and KC_D = 1024. Change-Id: I2d8bdd5f62f2d1f8782ae2997f3d7a26587d1ca4 commit 70cc825b552dec05165b9d70f9e6eb33d8abb118 Author: Devin Matthews Date: Tue Jun 6 21:58:21 2017 -0500 Update LICENSE Remove totally unnecessary first 9 lines and hopefully get Github to recognize it as 3BSD [ci skip]. commit cf54c77bc79a0f33a514be72c80a654c4e6e6f63 Author: Devin Matthews Date: Tue Jun 6 20:23:17 2017 -0500 Add new SSI acknowledgment commit d6ef56c6dbaf6df8ee1af1ca6a0f0792a811396a Author: prangana Date: Thu Jun 1 16:11:09 2017 +0530 Update version number Change-Id: Ib6e52d1d34c0791367ab9152dfab31f94deedeb4 commit 897bfa0e92082c30bbb74229562d7d7327cbbac8 Author: prangana Date: Thu Jun 1 16:11:09 2017 +0530 Update version number Change-Id: Ib6e52d1d34c0791367ab9152dfab31f94deedeb4 commit 99d0ba5606d4b63e6a9c639aa78d4defc2455f79 Merge: be2c7eb8 6d17e012 Author: Santanu Thangaraj Date: Thu Jun 1 02:19:02 2017 -0400 Merge "Checked in the small matrix code to compute GEMM called with A transpose case" into amd-staging commit 6d17e0120fe5c127b941136ad2c0c08e91439535 Author: sthangar Date: Wed May 24 11:48:16 2017 +0530 Checked in the small matrix code to compute GEMM called with A transpose case Change-Id: I29f40046d43d7a4b037c1cb322503ee26495f462 commit 9d93f8481a1404695f7b78a3ced8ca47e890b649 Author: prangana Date: Tue May 30 09:58:10 2017 +0530 Update Licence File Change-Id: I4c5cf1690d0cef92a68400f9a89e454ab6856ad2 commit be2c7eb85168937bd4318f4d05ded37620119310 Author: prangana Date: Tue May 30 09:58:10 2017 +0530 Update Licence File Change-Id: I4c5cf1690d0cef92a68400f9a89e454ab6856ad2 commit 7f41bb0a0becde6a7de7df0f99668d7b4686c3b0 Author: Devin Matthews Date: Fri May 26 14:49:31 2017 -0400 PACKDIM_MR=8 didn't work out, but messing with the prefetching helps 2%. commit d87614af3f3d9187be94d6e77984b282bf890928 Author: Devin Matthews Date: Fri May 26 14:47:36 2017 -0400 Revert "Change PACKDIM_MR (double) for haswell to 8." This reverts commit 681eec913d7c2ebcff637cec5c1627ced9a92b99. commit 681eec913d7c2ebcff637cec5c1627ced9a92b99 Author: Devin Matthews Date: Fri May 26 12:28:09 2017 -0500 Change PACKDIM_MR (double) for haswell to 8. commit 0a3ae0ecaa0ddcb5887005d7051fa234499f1120 Merge: 0f4e6652 6e04f9df Author: praveeng Date: Sat May 20 16:53:50 2017 +0530 frame/3/gemm/bli_gemm_front.c Change-Id: I52a0fbc1d33bb948d430942323bbc5fe44e3ca13 commit 6e04f9df01d79c1b0e673943ca0d5d0a6095eb2e Author: Field G. Van Zee Date: Wed May 17 13:03:52 2017 -0500 Restored deleted lines from makefile fragments. commit ec5c0c0448275280dca0991f6f33afeb73650450 Author: Devin Matthews Date: Wed May 17 12:29:44 2017 -0500 Change to /bin/sh. All scripts checked with Debian's checkbashisms. Also check for clang first in auto-detect.sh. commit 555ddc30d4c7e44f3f335e436c98606f56e1598b Author: Devin Matthews Date: Wed May 17 12:27:14 2017 -0500 Remove shebangs from makefiles. commit f26bd7f42e0c2a47fe321b2c452644990b689654 Merge: cbf8710a 169fb05f Author: Devin Matthews Date: Wed May 17 11:58:41 2017 -0500 Merge pull request #128 from iotamudelta/master Portability and clang commit 169fb05f225c2f060265bcaa872f7f80dc638b70 Author: J M Dieterich Date: Tue May 16 23:11:22 2017 -0400 Fix if/else structure. Thanks to TravisCI. commit 0579dfea0bcfbb90ebc073fcf78b92a5cf7238e1 Author: J M Dieterich Date: Tue May 16 22:58:07 2017 -0400 Restore version. commit a75b05c23dc786a1fdc45dc1627a5ce2299f1a7b Author: J M Dieterich Date: Tue May 16 22:23:27 2017 -0400 Mark piledriver compilable w/ clang. commit 7541d46e2ba8659bb2e36b444edef112fefa1345 Author: J M Dieterich Date: Tue May 16 22:12:12 2017 -0400 Mark bulldozer compilable w/ clang. commit 91f897073ec0df3330ede449c4d6af8158266ae3 Author: J M Dieterich Date: Tue May 16 22:06:59 2017 -0400 Correct error message. commit f5131e1e49167f948bddd714bb1af1761829c212 Author: J M Dieterich Date: Tue May 16 22:03:23 2017 -0400 Indeed once can compile for carrizo also using clang. commit 5fa4e9439c04f35f89dd7d26ff742cb2dadc3180 Author: J M Dieterich Date: Tue May 16 21:50:49 2017 -0400 A bunch of shebang fixes from unportable /bin/bash to portable /usr/bin/env bash commit 1f3a58197e5d5f9ac862bda91e7527cbfbab5d76 Author: Field G. Van Zee Date: Mon May 8 16:10:03 2017 -0500 Housekeeping, induced method file/function renames. Details: - Renamed all level-3 induced method files to use the "_vir.c" suffix instead of "_ref.c". Also renamed functions within these files accordingly. - Renamed cpp macro definitions in frame/ind/include according to the above changes. - Removed frame/3/old. commit cbf8710a1ba63e25aadaa6fc5da51ea81b3d596d Merge: cf39d3ef fdc66f12 Author: Tyler Michael Smith Date: Mon May 8 11:21:20 2017 -0500 Merge pull request #127 from devinamatthews/fix_blis_nt_xx Setting any one of BLIS_NT_[IJ][CR] overrides BLIS_NUM_THEADS commit cf39d3ef3b29b8058c39fb4638c1a734fe64aaed Author: Field G. Van Zee Date: Fri May 5 15:06:56 2017 -0500 Fixed a bug in norm1v, norm1m. Details: - Fixed a bug that manifested as improperly-computed 1-norm for vectors and matrices. This is one of the few operations in BLIS that does not have its own test module within the testsuite, hence why it went undetected for so long. The bad 1-norms were being used to normalize matrices in the testsuite after initialization, which led to some matrices containing a combination of "large" and "small" values. This tended to push the residuals computed after each test away from zero. In some cases, they were off *just* enough to the testsuite to label it a "failure". Many thanks to Jeff Hammond for reporting this bug. (Wonky details: the bug was due to improperly-defined level-0 scalar macros for abval2, an operation that computes the absolute square, or complex magnitude/modulus. Certain complex domain instances of abval2 were being incorrectly defined in terms of real-only solutions, leading to bad results. This level-0 operation forms the basis of norm1v/norm1m. absq2 was also affected, but almost nothing uses this operation.) commit 799485124f4d823e908d2e5d38b0c3a1e6172ade Merge: 773a24ef 0df3541f Author: Devin Matthews Date: Thu May 4 10:52:09 2017 -0500 Merge pull request #121 from jeffhammond/not-real-knl allow KNL build without hbwmalloc (i.e. emulated) commit fdc66f12d40754ff46179804bff592fddafbca02 Author: Devin Matthews Date: Thu May 4 10:35:22 2017 -0500 Setting any one of BLIS_NT_[IJ][CR] overrides BLIS_NUM_THEADS. Missing BLIS_NT_XX's are defaulted to 1. Fixes #123. commit 773a24efb2fa1c3a220bf0ce1dd621a3176196da Merge: dd58c954 b8854259 Author: Field G. Van Zee Date: Wed May 3 15:07:59 2017 -0500 Merge branch 'master' of github.com:flame/blis commit dd58c9545c877c3f7553eaebca7b5e9720a66f5d Author: Field G. Van Zee Date: Wed May 3 15:04:51 2017 -0500 Disable complex 3m/4m in testsuite by default. Details: - Disabled testsuite tests of all level-3 implementations based on 3m and 4m. This will improve testing runtime on Travis CI as well as for anyone manually running the testsuite using default test parameters. Thanks to Devin Matthews for suggesting this change. commit 0df3541f54b7fe0c604ab2ec47ba814f12391798 Author: Jeff Hammond Date: Tue May 2 19:25:21 2017 -0700 allow KNL build without hbwmalloc.h (i.e. emulated) we want to be able to run BLIS KNL binaries on non-KNL machines via SDE. although it is possible to install hbwmalloc implementation on such systems, it is easier not to, since obviously the performance of SDE execution is not representative so there is no reason to emulate HBW allocation. commit b88542591d4dd0cde366e5ae35afd3205cb81bdc Merge: 43007f7b c2c91e09 Author: Field G. Van Zee Date: Tue May 2 19:22:41 2017 -0500 Merge pull request #107 from jeffhammond/intel-compilers-no-use-libm never use libm with Intel compilers commit 43007f7b65ec7926cbbfc39965ff733fa251c15f Author: Field G. Van Zee Date: Tue May 2 16:48:43 2017 -0500 Fixed stray parentheses in README citations. commit a4f1d0b8801c114e9ef8be39df01e1b8d27ebcb3 Author: Field G. Van Zee Date: Tue May 2 16:38:43 2017 -0500 CHANGELOG update (0.2.2) commit 940a707ac78de975110e17c95765e65b89aa5e10 Author: Field G. Van Zee Date: Tue May 2 16:38:42 2017 -0500 Version file update (0.2.2) commit d5a5e003ea9b24bb6abf12e88862e8eb61ffb03d Author: Field G. Van Zee Date: Tue May 2 15:48:30 2017 -0500 Fixed a trsm1m bug that affected right-side cases. Details: - Fixed a bug introduced in 1c732d3 that affected trsm1m_r. The result was nondeterministic behavior (usually segmentation faults) for certain problem sizes beyond the 1m instance of kc (e.g. 128 on haswell). The cause of the bug was my commenting out lines in bli_gemm1m_ukr_ref.c which explicitly directed the virtual gemm micro-kernel to use temporary space if the storage preference of the [real domain] gemm ukernel did not match the storage of the output matrix C. In the context of gemm, this handling is not needed because agreement between the storage pref and the matrix is guaranteed by a high-level optimization in BLIS. However, this optimization is not applied to trsm because the storage of C is not necessarily the same as the storage of the micro-panels of B--both of which are updated by the micro-kernel during a trsm operation. Thus, the guarantee of storage/preference agreement is not in place for trsm, which means we must handle that case within the virtual gemm micro-kernel. - Comment updates and a minor macro change to bli_trsm*_cntx_init() for 3m1, 4m1a, and 1m. commit e80993e71f4d571e9650a8e90ed386e32059eae5 Merge: a509fbd5 ca3a7924 Author: Field G. Van Zee Date: Tue May 2 12:30:28 2017 -0500 Merge branch 'master' into 1m commit ca3a7924770d6cf203cce4ca9f5482e1d0d4e961 Author: Field G. Van Zee Date: Tue May 2 12:09:39 2017 -0500 README.md update. Details: - Updated bibtex entries for 4th BLIS paper, and adds entries for 5th and 6th BLIS papers. commit 0f4e6652dfe9b30105d3bab328ac26d9d5c11182 Merge: 42e7f6fb 6e7de6ef Author: praveeng Date: Wed Apr 19 17:54:10 2017 +0530 Merge master code till 2017_04_19 to amd-staging Change-Id: Ibebe83c8ea2e7eb15798c2bcf214b7228a1c9518 commit 42e7f6fb2a531429ee600b2fe0293b67371c7ccb Author: sthangar Date: Tue Mar 28 18:10:03 2017 +0530 fixed license attribute issues in AMD added files Change-Id: I303f870a777c7cd1c1af29ea0b93f3e0a27948e4 commit 5600001e973c6cea048bd3fdb28117f1d7c98b9d Merge: 0b190293 b3ed4933 Author: prangana Date: Mon Mar 20 13:56:33 2017 +0530 Fix merge conflicts after sync with release branch Change-Id: Icf14a09f728befb69a73fff9fa79c4128e728310 commit 6e7de6ef84babb273dc5528a9b9d01f0febe394b Author: Field G. Van Zee Date: Fri Mar 17 12:10:24 2017 -0500 Minor updates to test/3m4m. Details: - Updated initial problem size and increment in Makefile. - Updated code in test_gemm.c to correctly query kc from context. commit f484c6cd4389dc7ae5b972849e12e98ad5bbf9a4 Author: Field G. Van Zee Date: Fri Mar 17 12:07:27 2017 -0500 Whitespace reformatting to armv8a kernels file. Details: - Updated formatting of function signature/header in kernels/armv8a/3/bli_gemm_opt_4x4.c. commit 0b19029342ffc530fa22ef20398a26221cb8f6ec Author: Kiran Varaganti Date: Tue Mar 14 14:51:31 2017 +0530 Code cleanup, removed warnings from trsm, removed unused routines in axpyv & scalv Change-Id: I02867f394c5f416194c4b1769a6c75f39243ec81 commit 825363bd2a5a60a923d4a6d9691dc143845a9cab Merge: 093bdb80 513944e4 Author: praveeng Date: Wed Mar 8 15:42:49 2017 +0530 Merge code from master to amd-staging as on 2017_03_08 by praveeng Change-Id: I80740081b2cb54c9b77a3e78b9fe540e170be23d commit 093bdb80c86b06367e595aa17487139ae983822f Author: sthangar Date: Tue Mar 7 13:35:50 2017 +0530 Checked in Unpacked DGEMM code Change-Id: I39dcc7b238b328f73ee2675d21a5e521d0488723 commit 33923da9a108854590d386e74b6ee66b971e7796 Author: Kiran Varaganti Date: Mon Mar 6 14:31:31 2017 +0530 Added variant 10 for double precision axpyv microkernel Change-Id: I7a20cc113a422603250bc450825c965136354974 commit bc828f7f8e3ddb9f58af07edc0b935b21759fb0f Author: Kiran Varaganti Date: Fri Mar 3 14:45:35 2017 +0530 Added new axpyv (single precision) microkernel where it performs 10 FMAs per loop- This gives better performance than all other implementations of axpyv Change-Id: Ic4f0e4c67e367d67d0b24febcf34f81a70a39972 commit c9949f4603419267c10973adf1d63ec38497475d Author: sthangar Date: Fri Feb 17 14:16:33 2017 +0530 Checked in DGEMMTRSM and edge case handling routine in DDOTXF Change-Id: I65f00661af6c09b2507294fd43e0a10641c0597e commit a509fbd5ac04fafd4e51b43d2f59ca56432dc212 Merge: 69b4846a 513944e4 Author: Field G. Van Zee Date: Tue Feb 21 17:06:16 2017 -0600 Merge branch 'master' into 1m commit 69b4846ae9adb157c4171b52e159684db2867853 Author: Field G. Van Zee Date: Tue Feb 21 15:33:39 2017 -0600 Disabled experiment-related 1m code. Details: - Commented out code in frame/ind/oapi/bli_l3_3m4m1m_oapi.c that was specifically inserted to facilitate the benchmarking of 1m block-panel and panel-block algorithms. - Updates to test/3m4m/Makefile, runme.sh script, and test_gemm.c to reflect changes used/needed during benchmarking. commit 513944e4a951d8823b4de161b86ad7a965b4d99b Merge: 8b462a0e 0e18f68c Author: Devin Matthews Date: Mon Feb 20 10:04:33 2017 -0500 Merge pull request #118 from devinamatthews/master Handle k=0 correctly in KNL dgemm ukernel. commit 0e18f68cf12eb9189ba901a20040b1cdae417670 Author: Devin Matthews Date: Mon Feb 20 09:03:21 2017 -0600 Handle k=0 correctly in KNL dgemm ukernel. commit 8b462a0e8c3e9252f0401940849e53cc772256fa Merge: c362afc5 7d42fc07 Author: Devin Matthews Date: Sun Feb 19 23:03:03 2017 -0500 Merge pull request #117 from devinamatthews/master Cast dim_t and inc_t parameters to 64-bit in KNL microkernels. commit 7d42fc0796ef0c010375fd8e59b1240ba41ce4d2 Author: Devin Matthews Date: Sun Feb 19 21:10:55 2017 -0500 Cast dim_t and inc_t parameters to 64-bit in KNL microkernels. commit 04245c9ff7f8b3c70d61003029c964bb9a4320ee Author: Kiran Varaganti Date: Fri Feb 10 14:24:30 2017 +0530 Reoptimized scalv routines - two vector multiplies are done per iteration, and these routines are enabled in bli_kernel.h Change-Id: Ic5654508573d1f6bde2edef06aefe117e581feb5 commit c362afc525bab4050581d1b0fcea2fe4d582c608 Author: Field G. Van Zee Date: Thu Feb 9 11:54:59 2017 -0600 Added missing "level-0" BLAS [sd]cabs1_(). Details: - Fixed issue #115 by adding implementations for scabs1_() and dcabs1_() to the BLAS compatibility layer. Thanks to heroxbd for pointing out their absence. commit 018180c938c32efbeaaf626ba71ec5b780664db1 Author: Field G. Van Zee Date: Wed Feb 8 11:20:52 2017 -0600 Fixed a minor bug in configure (issue #114). Details: - Fixed a bug in the configure script whereby a non-preferred value for --enable-threading would cause problems in common.mk vis-a-vis detecting which threading model was chosen. Thanks to heroxbd for reporting this issue. commit 58b5b77e5fdb179ea465e398e416e6a00d917e05 Author: Kiran Varaganti Date: Wed Feb 8 21:43:34 2017 +0530 Fixed a bug in axpyv, the arguments passed to intrinsic fmad instruction are corrected Change-Id: If12f24c6bc74b22ac9e4acd6b9378e06d79f2f5e commit 85de4ebf74d0a5587d5a12724eb5489d51674db3 Author: Kiran Varaganti Date: Wed Feb 8 14:41:04 2017 +0530 variant 4 axpyv single precision modified: explicitly used FMA intrinsics, replaced vector multiply and add operations Change-Id: I975feef56696d479d2b9e9441b0660021cf4f6ff commit 3fa53e8af31d634779f40258c51483ae8af494fa Merge: b5291a44 95be7b04 Author: Kiran Varaganti Date: Wed Feb 8 11:46:34 2017 +0530 Merged axpyv and gemm small in bli_kernel.h Merge branch 'amd-staging' of ssh://git.amd.com:29418/cpulibraries/er/blis into amd-staging modified: config/zen/bli_kernel.h modified: frame/3/gemm/bli_gemm_front.c modified: kernels/x86_64/zen/3/bli_gemm_small_matrix.c Change-Id: If181cf9345178c448b3530beb8bef453917fe295 commit 95be7b04709e688a4cb01fba680081e30f4258ef Author: sthangar Date: Tue Feb 7 14:01:27 2017 +0530 Added logic for packing matrix A and prefetching matrix C in Unpacked SGEMM code Change-Id: I99efeca9eb5b4449286ec0ec133fd554ef1bb4f0 commit b5291a445b1313e01f1e0e8102c5f3660ab07f69 Author: Kiran Varaganti Date: Tue Feb 7 12:39:31 2017 +0530 Added optimization variant 4 for axpyv single precision - this performs 5 FMA per loop, keeping the IPC always full Change-Id: Ie77ed22584271136a257e673bcd3b1ba71136bc9 commit f4bfc1662af82aa4b98185334c44835e51f1cbec Author: Kiran Varaganti Date: Mon Feb 6 15:04:27 2017 +0530 New routines implemented for axpyv to improve performance for small vector sizes, vectorization is done for vectors as small as 8 (single precision) 4(double precision), since this operation has low compute to memory ratio, higher matrix sizes memory operations are dominating and hence not much gain - This still needs some work- added saxpyv and daxpyv var 3 routines in the file bli_axpyv_opt_var1.c Change-Id: Ic1b33bd5516e10113b00e44ab41b97eb19d46072 commit ddf45e71770c55ea4a58ca24ea4913fe5d8beb9b Merge: a6ab91bc 78e1b16e Author: Devin Matthews Date: Fri Jan 27 14:25:40 2017 -0600 Merge pull request #113 from devinamatthews/knl_thread_params Change default threading parameters for KNL. commit 78e1b16e16d589ed31b2e712115ee282097f114d Author: Devin Matthews Date: Fri Jan 27 14:22:20 2017 -0600 Change default threading parameters for KNL. commit 574472ba5a89924eca7dbd10055d0e1dcd7f4c71 Author: sthangar Date: Tue Jan 10 14:51:46 2017 +0530 checked in unpacked SGEMM optimization Change-Id: I8e4ea374415c0c402c660b656fb076af15354181 commit 1c732d3ddc4ac0861d3b0e0dd15eb7e071615502 Author: Field G. Van Zee Date: Wed Jan 25 16:25:46 2017 -0600 Added 1m-specific APIs for bp, pb gemm algorithms. Details: - Defined bli_gemmbp_cntl_create(), bli_gemmpb_cntl_create(), with the body of bli_gemm_cntl_create() replaced with a call to the former. - Defined bli_cntl_free_w_thrinfo(), bli_cntl_free_wo_thrinfo(). Now, bli_cntl_free() can check if the thread parameter is NULL, and if so, call the latter, and otherwise call the former. - Defined bli_gemm1mbp_cntx_init(), bli_gemm1mpb_cntx_init(), both in terms of bli_gemm1mxx_cntx_init(), which behaves the same as bli_gemm1m_cntx_init() did before, except that an extra bool parameter (is_pb) is used to support both bp and pb algorithms (including to support the anti-preference field described below). - Added support for "anti-preference" in context. The anti_pref field, when true, will toggle the boolean return value of routines such as bli_cntx_l3_ukr_eff_prefers_storage_of(), which has the net effect of causing BLIS to transpose the operation to achieve disagreement (rather than agreement) between the storage of C and the micro-kernel output preference. This disagreement is needed for panel-block implementations, since they induce a transposition of the suboperation immediately before the macro-kernel is called, which changes the apparent storage of C. For now, anti-preference is used only with the pb algorithm for 1m (and not with any other non-1m implementation). - Defined new functions, bli_cntx_l3_ukr_eff_prefers_storage_of() bli_cntx_l3_ukr_eff_dislikes_storage_of() bli_cntx_l3_nat_ukr_eff_prefers_storage_of() bli_cntx_l3_nat_ukr_eff_dislikes_storage_of() which are identical to their non-"eff" (effectively) counterparts except that they take the anti-preference field of the context into account. - Explicitly initialize the anti-pref field to FALSE in bli_gks_cntx_set_l3_nat_ukr_prefs(). - Added bli_gemm_ker_var1.c, which implements a panel-block macro-kernel in terms of the existing block-panel macro-kernel _ker_var2(). This technique requires inducing transposes on all operands and swapping the A and B. - Changed bli_obj_induce_trans() macro so that pack-related fields are also changed to reflect the induced transposition. - Added a temporary hack to bli_l3_3m4m1m_oapi.c that allows us to easily specify the 1m algorithm (block-panel or panel-block). - Renamed the following cntx_t-related macros: bli_cntx_get_pack_schema_a() -> bli_cntx_get_pack_schema_a_block() bli_cntx_get_pack_schema_b() -> bli_cntx_get_pack_schema_b_panel() bli_cntx_get_pack_schema_c() -> bli_cntx_get_pack_schema_c_panel() and updated all instantiations. Also updated the field names in the cntx_t struct. - Comment updates. commit 41595e98eedaf3f1f93802c14dcae490402f933f Merge: d625c49e a6ab91bc Author: praveeng Date: Wed Dec 7 15:13:21 2016 +0530 Merge master code as on 2016_12_07 to amd-staging Change-Id: I5d9ecef9bff960aeb9b51ca4e4b21714e789e44f commit d625c49e20bd3c50d6d44e330e34076cced114a3 Author: sthangar Date: Tue Nov 29 15:05:19 2016 +0530 checked-in SGEMMTRSM microkernel for Zen Change-Id: Ib61936418dea911b2154aa99f703b66e9669f94f commit a6ab91bc61432490fadf18d596de4589645f37dd Merge: 145a551d 7f31a630 Author: Field G. Van Zee Date: Wed Nov 30 09:26:58 2016 -0600 Merge pull request #111 from figual/master Fixed missing cntx argument in ARMv8 microkernels. commit 7f31a6307b7bd35f913c895947552c3a176f789b Author: Francisco Igual Date: Sun Nov 27 14:40:47 2016 +0100 Fixed missing cntx argument in ARMv8 microkernels. commit 126482a3b609b9ad7026ba348f6c4bf6a29be8a1 Author: Field G. Van Zee Date: Fri Nov 25 18:29:49 2016 -0600 Implemented the 1m method. Details: - Implemented the 1m method for inducing complex domain matrix multiplication. 1m support has been added to all level-3 operations, including trsm, and is now the default induced method when native complex domain gemm microkernels are omitted from the configuration. - Updated _cntx_init() operations to take a datatype parameter. This was needed for the corresponding function for 1m (because 1m requires us to choose between column-oriented or row-oriented execution, which requires us to query the context for the storage preference of the gemm microkernel, which requires knowing the datatype) but I decided that it made sense for consistency to add the parameter to all other cntx initialization functions as well, even though those functions don't use the parameter. - Updated bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs() to take a second scalar for each blocksize entry. The semantic meaning of the two scalars now is that the first will scale the default blocksize while the second will scale the maximum blocksize. This allows scaling the two independently, and was needed to support 1m, which requires scaling for a register blocksize but not the register storage blocksize (ie: "packdim") analogue. - Deprecated bli_blksz_reduce_dt_to() and defined two new functions, bli_blksz_reduce_def_to() and bli_blksz_reduce_max_to(), for reducing default and maximum blocksizes to some desired blocksize multiple. These functions are needed in the updated definitions of bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs(). - Added support for the 1e and 1r packing schemas to packm, including 1e/1r packing kernels. - Added a minor optimization to bli_gemm_ker_var2() that allows, under certain circumstances (specifically, real domain beta and row- or column-stored matrix C), the real domain macrokernel and microkernel to be called directly, rather than using the virtual microkernel via the complex domain macrokernel, which carries a slight additional amount of overhead. - Added 1m support to the testsuite. - Added 1m support to Makefile and runme.sh in test/3m4m. Also simplified some code in test_gemm.c driver. commit d8f13beeea90338e0ecb0a3aeaa2d59d8ebd6c36 Merge: c25a9205 145a551d Author: praveeng Date: Fri Nov 25 17:31:08 2016 +0530 Merge master code till 2016_11_25 to amd-staging commit c25a9205fd8c8d8de7fd81b1e5621e7ac79f4e87 Merge: 65298762 bdc0a264 Author: praveeng Date: Fri Nov 25 17:06:36 2016 +0530 Merge master code till Switched to simpler trsm_r 2016_11_25 to amd-staging Change-Id: Ibf71d224d8fb6cf0bc497f84d50c27d276512cc1 commit 145a551d524ae5492667a05fc248923d922df850 Author: Field G. Van Zee Date: Wed Nov 23 17:59:06 2016 -0600 Switched to simpler trsm_r implementation. Details: - Disabled the implementation of trsm_r that allows the right-hand matrix B to be trianglar, and switched to the implementation that simply transposes the operation (and thus the storage of C) in order to recast the operation as trsm_l. This avoids the need to use trsm_rl and trsm_ru macrokernels, which require an awkward swapping of MR and NR. For now, the support for trsm_r macrokernels, via separate control trees, remains. - Modified bli_config_macro_defs.h so that BLIS_RELAX_MCNR_NCMR_CONSTRAINTS is defined by default. This is mostly a safety precaution in case someone tries to switch back to the previous trsm_r implementation, but also serves as a convenience on some systems where one does not naturally choose blocksizes in a way that satisfies MC % NR = 0 and NC % MR = 0. commit b3e58ee30307cf1e11529f2113acb9abbeda25af Author: Field G. Van Zee Date: Wed Nov 23 17:58:26 2016 -0600 Reimplemented 4x12 haswell ukernels (real only). Details: - Replaced permutation-based implementations in bli_gemm_asm_d4x12.c, which defines 4x24 single real and 4x12 double real gemm microkernels, with broadcast-based implementations. (The previous microkernel file has been moved to an 'old' subdirectory.) commit 65298762ff15c45e8588e0c279a9feaa98c927a0 Author: sthangar Date: Tue Nov 22 12:15:33 2016 +0530 removed a redundant copy operation in DNRM2 Change-Id: I673b08efde4480e871779716f7715566740ad9ce commit d6863e851adeef037e4d1476fe63bb293fb9d987 Author: sthangar Date: Mon Nov 21 11:30:30 2016 +0530 checked-in DNRM2 optimizations Change-Id: I3b31d768bd7f4fbf43042aa5a0762995c73c4522 commit bdc0a264d2fb5940bfd09298b1de823674a39053 Author: Field G. Van Zee Date: Wed Nov 16 14:13:08 2016 -0600 Adjusted stride selection of ct in macrokernels. Details: - Updated the changes introduced in 618f433 so that the strides of the temporary microtile ct used in the macrokernels is determined based on the storage preference of the microkernel (via the new functions below), rather than the strides of c. In almost all cases, presently, this change results in no net effect, as a high-level optimization in the _front() functions aligns the storage of c to that of the microkernel's preference. However, I encountered some cases where this is not always the case in some development code that has yet to be committed, and therefore I'm generalizing the framework code in advance. - Defined two new functions in bli_cntx.c: bli_cntx_l3_ukr_prefers_rows_dt() bli_cntx_l3_ukr_prefers_cols_dt() which return bool_t's based on the current micro-kernel's storage preferences. For induced methods, the preference of the underlying real domain microkernel is returned. - Updated definition of bli_cntx_l3_ukr_dislikes_storage_of(), and by proxy bli_cntx_l3_ukr_prefers_storage_of(), to be in terms of the above functions, rather than querying the preferences of the native microkernel directly (which did the wrong thing for induced methods). commit 031978d2647cf08316858baf29c84ebba9c3133e Author: Field G. Van Zee Date: Wed Nov 16 14:04:33 2016 -0600 Fixed inactive trsm_r blocksize constraint code. Details: - Changed a cpp macro that was meant to prevent using certain trsm_r code if BLIS_RELAX_MCNR_NCMR_CONSTRAINTS was defined. It was actually coded incorrectly at first. I've now fixed its location and changed its consequence to a compile-time #error message. commit 9772218cae57d55c252595b01e3669d8bed84944 Author: sthangar Date: Wed Nov 16 15:19:19 2016 +0530 Added optimized DAMAX routines for Zen Change-Id: I499c0c8f0f4ce6c19235c47b86d5608db6ba50f8 commit 9c448e30174e5eb76a94b43b30819704a5dfcb3f Merge: 998d8240 e35d3c23 Author: Santanu Thangaraj Date: Wed Nov 16 04:18:57 2016 -0500 Merge "Added new optimized micro-kernel for dotxv routine" into amd-staging commit 998d824044adac0d54c921dcd44fb58f3d54aad2 Merge: 0d13e9a4 6b5a4032 Author: praveeng Date: Wed Nov 16 14:22:42 2016 +0530 Merge master code till devinamatthews/omp_num_thrds 2016_11_16 to amd-staging Change-Id: I601ff1d3ec8a680e1be039ffc7b299744e8a27c5 commit 6b5a4032d2e3ed29a272c7f738b7e3ed6657e556 Merge: 3b524a08 a8220e3a Author: Field G. Van Zee Date: Thu Nov 10 15:28:24 2016 -0600 Merge pull request #109 from devinamatthews/omp_num_threads Add automatic loop thread assignment. commit a8220e3a86433b5d76789e32ea7ca014a11b6d17 Author: Devin Matthews Date: Thu Nov 10 14:19:34 2016 -0600 - Fix typo in bli_cntx.c - Bump BLIS_DEFAULT_NR_THREAD_MAX to 4 commit e35d3c23f28784e50ee13d2e77a69d60e0c24c1f Author: Kiran Varaganti Date: Thu Nov 10 14:30:53 2016 +0530 Added new optimized micro-kernel for dotxv routine Change-Id: I2c544e9b25a454d971ad690353502a55cd668391 commit 0d13e9a4f6f2fcda08f205215240cdf86442d6c6 Merge: e044fa62 3b524a08 Author: praveeng Date: Mon Nov 7 14:40:41 2016 +0530 bli_kernel.h Change-Id: I425d089f79497a0de7d1622e829c3ca9edf7f091 commit c05b3862f6241486442b313eff0c8bee7b5e1274 Author: Devin Matthews Date: Fri Nov 4 15:48:02 2016 -0500 Add automatic loop thread assignment. - Number of threads is determined by BLIS_NUM_THREADS or OMP_NUM_THREADS, but can be overridden by BLIS_XX_NT as before. - Threads are assigned to loops (ic, jc, ir, and jc) automatically by weighted partitioning and heuristics, both of which are tunable via bli_kernel.h. - All level-3 BLAS covered. commit 3b524a08e3fb8380e7b8b2ba835312c51a331570 Author: Field G. Van Zee Date: Wed Nov 2 17:45:18 2016 -0500 Consolidated 3m1/4m1 gemmtrsm, trsm ukernel code. Details: - Consolidated the macros that define the lower and upper versions of the gemmtrsm microkernels into a single macro that is instantiated twice. Did this for both 3m1 and 4m1 microkernels. - Consolidated lower and upper versions of the trsm microkernels for 3m1 and 4m1 into single files (each). commit ead231aca635deb3db270f118454e4222c627f31 Merge: d25e6f8b 62987f60 Author: Field G. Van Zee Date: Wed Nov 2 13:03:50 2016 -0500 Merge pull request #108 from devinamatthews/patch-2 Update .travis.yml with additional tests commit 62987f60a6a6ff0a75b31d0404f493593ce35ccc Author: Devin Matthews Date: Wed Nov 2 11:20:37 2016 -0500 Allow KNL to fail commit 8f9010542c751ae3cbfe6121cb011d8985c1e00d Author: Devin Matthews Date: Wed Nov 2 11:18:32 2016 -0500 Fix some problems with OSX builds: - Update CPU detection for Intel archs (esp. Skylake) - Allow clang for the reference config commit d25e6f8b63c57f30b8a67dffbf4995977cf9f235 Author: Field G. Van Zee Date: Tue Nov 1 14:35:15 2016 -0500 Can disable trsm_r-specific blocksize constraints. Details: - Added cpp guards around the constraints in bli_kernel_macro_defs.h that enforce MC % NR = 0 and NC % MR = 0. These constraints are ONLY needed when handling right-side trsm by allowing the matrix on the right (matrix B) to be triangular, because it involves swapping register, but not cache, blocksizes (packing A by NR and B by MR) and then swapping the operands to gemmtrsm just before that kernel is called. It may be useful to disable these constraints if, for example, the developer wishes to test the configuration with a different set of cache blocksizes where only MC % MR = 0 and NC % NR = 0 are enforced. - In summary, #defining BLIS_RELAX_MCNR_NCMR_CONSTRAINTS will bypass the enforcement of MC % NR = 0 and NC % MR = 0. commit 1a67e3688edb073a9d44c160e7b0798e08796b8a Author: Devin Matthews Date: Tue Nov 1 13:53:18 2016 -0500 Bogus commit Need to trigger another Travis build. commit 2cd82d67b372cad1bed50cfd99e524f1f40b4e24 Author: Devin Matthews Date: Tue Nov 1 13:25:50 2016 -0500 Some fixes for .travis.yml - Switch to gcc-5 to support knl - Don't run tests in parallel -- it is super slow. - Use clang on OSX since gcc is only a zombie husk. commit a3db4e6bdfe745083acf704ab0f51f74ea869538 Author: Devin Matthews Date: Tue Nov 1 10:33:18 2016 -0500 Update .travis.yml with additional tests - Test knl configuration (without running of course). - Test openmp and pthreads threading for auto configuration with 4 threads. - Test auto configuration with and without pthreads on OSX. - Also, run make in parallel. I don't know how the `addons:` section works on OSX; hopefully it is just ignored. commit 8a11a2174a1a5b9426f13bbc5338dc86ab138cdd Author: Field G. Van Zee Date: Mon Oct 31 19:07:55 2016 -0500 Updates to non-default haswell microkernels. Details: - Updated s and d microkernels in bli_gemm_asm_d8x6.c to relax alignment constraints. - Added missing c and z microkernels, which are based on the corresponding kernels in the d6x8 set. - This completes the d8x6 set (which may be used for situations when it is desirable to have a microkernel with a column preference). commit 618f4331eba209803ecab99747872eceb1b5f091 Author: Field G. Van Zee Date: Mon Oct 31 14:40:51 2016 -0500 Align strides of ct in macrokernels to that of c. Details: - Previously, rs_ct and cs_ct, the strides of the temporary microtile used primarily in the macrokernels' edge case handling, were unconditionally set to 1 and MR, respectively. However, Devin Matthews noted that this ought to be changed so that the strides of ct were in agreement with the strides of C. (That is, if C was row-stored, then ct should be accessed as by rows as well.) The implicit assumption is that the strides of C have already been adjusted, via induced transposition, if the storage preference of the microkernel is at odds with the storage of C. So, if the microkernel prefers row storage, the macrokernel's interior cases would present row-stored (ideal) microkernel subproblems to the microkernel, but for edge cases, it would still see column-stored subproblems (not ideal). This commit fixes this issue. Thanks to Devin for his suggestion. commit c2c91e09b4893cb81314774557f728a95080f81e Author: Jeff Hammond Date: Tue Oct 25 21:15:26 2016 -0700 never use libm with Intel compilers Intel compilers include a highly optimized math library (libimf) that should be used instead of GNU libm. yes, this change is for ALL targets, including those that are not supported by the Intel compiler. there is no harm in doing this, and it is future-proof in the event that the Intel compilers support other architectures. commit 630391002325a589063aec2ab0a7d89ef2e178c0 Merge: 956b3edf 216206c1 Author: Field G. Van Zee Date: Tue Oct 25 19:34:51 2016 -0500 Merge pull request #105 from devinamatthews/knl Support for Intel Knight's Landing. commit 216206c1d328a865c2192e35a4df6e9aff79a85b Author: Devin Matthews Date: Tue Oct 25 13:56:18 2016 -0500 Fix up for merge to master. commit 11eb7957abbcdf02d5e312898e094260eadb1209 Merge: cd5b6681 956b3edf Author: Devin Matthews Date: Tue Oct 25 13:51:07 2016 -0500 Merge branch 'master' into knl # Conflicts: # frame/thread/bli_thread.h commit cd5b6681838899283cd94e5427dfda206e7fbabe Author: Devin Matthews Date: Tue Oct 25 13:49:27 2016 -0500 Don't use %rbp in KNL packing kernels. commit 956b3edf8eb09480f31f2e861c1b10f9ecbb2e52 Merge: b7e41d71 0662a3c1 Author: Field G. Van Zee Date: Tue Oct 25 13:02:57 2016 -0500 Merge pull request #104 from devinamatthews/misspellings Add flexible options for thread model (pthread/posix for pthreads etc.). commit 0662a3c1b1f4644a86bf8e5073d1391808c91b4a Author: Devin Matthews Date: Tue Oct 25 12:42:44 2016 -0500 Add flexible options for thread model (pthread/posix for pthreads etc.). commit e044fa624008c161de32a39d734cddf1dd22dd41 Author: Kiran Varaganti Date: Tue Oct 25 13:03:05 2016 +0530 Changed double precision trsm kernel macro definition to bli_dtrsm_l_int_6x8 from 6x16 : it fixes the seg fault Change-Id: Ia8c1de5fe13a370d691570a50136d55ffb18908a commit b3ed4933aa0da72ad771fb0fdf1727e5ba9ad7b4 Author: Kiran Varaganti Date: Tue Oct 25 13:03:05 2016 +0530 Changed double precision trsm kernel macro definition to bli_dtrsm_l_int_6x8 from 6x16 : it fixes the seg fault Change-Id: Ia8c1de5fe13a370d691570a50136d55ffb18908a commit b7e41d71b07d2af6d22d632c70e0c5f7ce46852c Merge: 4bd905bd 5117d444 Author: Field G. Van Zee Date: Mon Oct 24 16:47:46 2016 -0500 Merge pull request #103 from devinamatthews/patch-1 Change .align to .p2align in Bulldozer ukernels. commit 5117d444f7f3a2bc327f067926eaf2398212edda Author: Devin Matthews Date: Mon Oct 24 16:20:47 2016 -0500 Change .align to .p2align in Bulldozer ukernels Apparently OSX doesn't allow .align directives for >16B, so I've changed these to their .p2align counterparts. commit 4bd905bd4597e0ad7bedf31e25e779d3e2dfda29 Merge: 936d5fdc 7f32dd57 Author: Field G. Van Zee Date: Fri Oct 21 14:48:44 2016 -0500 Merge pull request #93 from ShadenSmith/config_check Adds sanity check to configuration choice. commit 936d5fdc26c6c4dab199a8d11fde948975cfa1d6 Author: Field G. Van Zee Date: Fri Oct 21 14:34:27 2016 -0500 Fixed multithreading compilation bug in 970745a. Details: - Moved the definition of the cpp macro BLIS_ENABLE_MULTITHREADING from bli_thread.h to bli_config_macro_defs.h. Also moved the sanity check that OpenMP and POSIX threads are not both enabled. - Thanks to Krzysztof Drewniak for reporting this bug. commit d250e6a3af3af8beedcda28f508ac03e94efb3c8 Author: Kiran Varaganti Date: Thu Oct 20 14:34:39 2016 +0530 Merged TRSM and scalv routines into zen folder Change-Id: Ice897bc83e8fb70b90f23cc3ce892c39883aceb9 commit 8feb0f85a674e84bec2417486e3bcea584b14c04 Author: Field G. Van Zee Date: Wed Oct 19 16:05:41 2016 -0500 Removed auto-prototyping of malloc()/free() substitutes. Details: - Removed the header file, bli_malloc_prototypes.h, which automatically generated prototypes for the functions specified by the following cpp macros: BLIS_MALLOC_INTL BLIS_FREE_INTL BLIS_MALLOC_POOL BLIS_FREE_POOL BLIS_MALLOC_USER BLIS_FREE_USER These prototypes were originally provided primarily as a convenience to those developers who specified their own malloc()/free() substitutes for one or more of the following. However, we generated these prototypes regardless, even when the default values (malloc and free) of the macros above were used. A problem arose under certain circumstances (e.g., gcc in C++ mode on Linux with glibc) when including blis.h that stemmed from the "throw" specification which was added to the glibc's malloc() prototype, resulting in a prototype mismatch. Therefore, going forward, developers who specify their own custom malloc()/free() substitutes must also prototype those substitutes via bli_kernel.h. Thanks to Krzysztof Drewniak for reporting this bug, and Devin Matthews for researching the nature and potential solutions. commit 970745a5fc7c29de3e202988e5eb104fabca4fdc Author: Field G. Van Zee Date: Wed Oct 19 15:58:03 2016 -0500 Reorganized typedefs to avoid compiler warnings. Details: - Relocated membrk_t definition from bli_membrk.h to bli_type_defs.h. - Moved #include of bli_malloc.h from blis.h to bli_type_defs.h. - Removed standalone mtx_t and mutex_t typedefs in bli_type_defs.h. - Moved #include of bli_mutex.h from bli_thread.h to bli_typedefs.h. - The redundant typedefs of membrk_t and mtx_t caused a warning on some C compilers. Thanks to Tyler Smith for reporting this issue. commit 1c2f7b57d557c05f5ef6148cccafaf0f70d910da Author: sthangar Date: Tue Oct 18 15:06:35 2016 +0530 Removed symlinks to zen kernels from haswell kernel folder and also modified the bli_kernel.h file accordingly Change-Id: Ib3736af48e851c8243bbe10d937fb942c49ad048 commit d864ea9f4f039fe2b2dc395d0015bd9e8902bc8e Merge: 7045fcbf 28b2af8a Author: praveeng Date: Fri Oct 14 17:00:57 2016 +0530 Merge master code 2016_10_14 till Added disabled code thrinfo_t structures Change-Id: If7db98d286c1471fcd30f00757abee9b253ef987 commit 28b2af8a71133ce68774e153b6e05afb05affba8 Author: Field G. Van Zee Date: Thu Oct 13 14:50:08 2016 -0500 Added disabled code to print thrinfo_t structures. Details: - Added cpp-guarded code to bli_thrcomm_openmp.c that allows a curious developer to print the contents of the thrinfo_t structures of each thread, for verification purposes or just to study the way thread information and communicators are used in BLIS. - Enabled some previously-disabled code in bli_l3_thrinfo.c for freeing an array of thrinfo_t* values that is used in the new, cpp-guarde code mentioned above. - Removed some old commented lines from bli_gemm_front.c. commit 11eed3f683d09e65f721567b346b0f733bff9a64 Author: Field G. Van Zee Date: Thu Oct 13 14:23:23 2016 -0500 Fixed a configure -t omp/openmp bug from fd04869. Details: - Forgot to update certain occurrences of "omp" in common.mk during commit fd04869, which changed the preferred configure option string for enabling OpenMP from "omp" to "openmp". commit 7045fcbf0bd349ebe6cb9ac4508c6a387bb05966 Merge: 7e044900 9cda6057 Author: praveeng Date: Thu Oct 13 12:02:28 2016 +0530 Merge master code 2016_10_13 Removed previously renamed/old files Change-Id: I8106d371afaa0af474a8967388d44481b05de923 commit 7e04490002206d3557fcfb7dd893838a7f36916f Author: sthangar Date: Wed Oct 12 16:43:02 2016 +0530 Checked in the SAMAX optimizations Change-Id: I7faf8c3adf52ff01432188ad3b9866ee4b9a9dfd commit 9cda6057eaa16a24ac8785a9fa167df6c9edba44 Author: Field G. Van Zee Date: Tue Oct 11 13:21:26 2016 -0500 Removed previously renamed/old files. Details: - Removed frame/base/bli_mem.c and frame/include/bli_auxinfo_macro_defs.h, both of which were renamed/removed in 701b9aa. For some reason, these files survived when the compose branch was merged back into master. (Clearly, git's merging algorithm is not perfect.) - Removed frame/base/bli_mem.c.prev (an artifact of the long-ago changed memory allocator that I was keeping around for no particular reason). commit 22377abd84b9e560ffe1c4e4d284eb443ddb7133 Author: Field G. Van Zee Date: Mon Oct 10 13:43:56 2016 -0500 Fixed bli_gemm() segfault on empty C matrices. Details: - Fixed a bug that would manifest in the form of a segmentation fault in bli_cntl_free() when calling any level-3 operation on an empty output matrix (ie: m = n = 0). Specifically, the code previously assumed that the entire control tree was built prior to it being freed. However, if the level-3 operation performs an early exit, the control tree will be incomplete, and this scenario is now handled. Thanks to Elmar Peise for reporting this bug. commit 0b571cd94d9b175331c9453258a6b1389a718ae8 Author: Field G. Van Zee Date: Thu Oct 6 14:48:15 2016 -0500 Fixed segfault in bli_free_align() for NULL ptrs. Details: - Fixed a bug in bli_free_align() caused by failing to handle NULL pointers up-front, which led to performing pointer arithmetic on NULL pointers in order to free the address immediately before the pointer. Thanks to Devin Matthews for reporting this bug. commit cd84fb95182514601d72c78ee0e36a394d0284d7 Author: praveeng Date: Thu Oct 6 15:08:21 2016 +0530 syntax erros in configure file Change-Id: Ibe8a6071aad97df550df64c009fec33a9d8f43a1 commit f2e7ea113aa93b74f1d42408d5db2c5a7b00a653 Merge: 133983c3 86969873 Author: praveeng Date: Thu Oct 6 12:35:30 2016 +0530 conflicts merge for bli_kernel.h Change-Id: I15d846bd34e11f86ebfd7ed091ff671a1f3366a0 commit 133983c36fa01c7acb6d666b3744f77f216314a5 Author: sthangar Date: Thu Oct 6 11:26:22 2016 +0530 code clean up in bli_kernel.h Change-Id: I11d9cdf2af8e8199209eb084f6c3a7c910b83d5d commit 4fb9b4ef2e4cf2626a6e000a41628fb823f16da8 Author: Field G. Van Zee Date: Wed Oct 5 14:41:35 2016 -0500 CHANGELOG update (0.2.1) commit 866b2dde3f41760121115fb25f096d4344e8b4f9 Author: Field G. Van Zee Date: Wed Oct 5 14:41:34 2016 -0500 Version file update (0.2.1) commit 87fddeab3c8a5ccb1bbf02e5f89db1464e459ba9 Merge: 86969873 6f71cd34 Author: Field G. Van Zee Date: Wed Oct 5 13:35:01 2016 -0500 Merge branch 'compose' commit 6f71cd344951854e4cff9ea21bbdfe536e72611d Merge: c0630c40 8d55033c Author: Field G. Van Zee Date: Tue Oct 4 15:53:46 2016 -0500 Merge pull request #94 from flame/distcomm Implemented distributed thrinfo_t management. commit 86969873b5b861966d717d8f9f370af39e3d9de6 Author: Field G. Van Zee Date: Tue Oct 4 14:24:59 2016 -0500 Reclassified amaxv operation as a level-1v kernel. Details: - Moved amaxv from being a utility operation to being a level-1v operation. This includes the establishment of a new amaxv kernel to live beside all of the other level-1v kernels. - Added two new functions to bli_part.c: bli_acquire_mij() bli_acquire_vi() The first acquires a scalar object for the (i,j) element of a matrix, and the second acquires a scalar object for the ith element of a vector. - Added integer support to bli_getsc level-0 operation. This involved adding integer support to the bli_*gets level-0 scalar macros. - Added a new test module to test amaxv as a level-1v operation. The test module works by comparing the value identified by bli_amaxv() to the the value found from a reference-like code local to the test module source file. In other words, it (intentionally) does not guarantee the same index is found; only the same value. This allows for different implementations in the case where a vector contains two or more elements containing exactly the same floating point value (or values, in the case of the complex domain). - Removed the directory frame/include/old/. commit 8d55033c966feed99fcca2a58017c3ab5b1646dc Author: Field G. Van Zee Date: Tue Sep 27 15:20:58 2016 -0500 Implemented distributed thrinfo_t management. Details: - Implemented Ricardo Magana's distributed thread info/communicator management. Rather that fully construct the thrinfo_t structures, from root to leaf, prior to spawning threads, the threads individually construct their thrinfo_t trees (or, chains), and do so incrementally, as needed, reusing the same structure nodes during subsequent blocked variant iterations. This required moving the initial creation of the thrinfo_t structure (now, the root nodes) from the _front() functions to the bli_l3_thread_decorator(). The incremental "growing" of the tree is performed in the internal back-end (ie: _int()) function, and so mostly invisible. Also, the incremental growth of the thrinfo_t tree is done as a function of the current and parent control tree nodes (as well as the parent thrinfo_t node), further reinforcing the parallel relationship between the two data structures. - Removed the "inner" communicator from thrinfo_t structure definition, as well as its id. Changed all APIs accordingly. Renamed bli_thrinfo_needs_free_comms() to bli_thrinfo_needs_free_comm(). - Defined bli_l3_thrinfo_print_paths(), which prints the information in an array of thrinfo_t* structure pointers. (Used only as a debugging/verification tool.) - Deprecated the following thrinfo_t creation functions: bli_packm_thrinfo_create() bli_l3_thrinfo_create() because they are no longer used. bli_thrinfo_create() is now called directly when creating thrinfo_t nodes. commit fd04869ae4d4a3b0ebb9052557c296456bce7c0d Author: Field G. Van Zee Date: Tue Sep 27 14:14:11 2016 -0500 Changed configure's 'omp' threading to 'openmp'. Details: - Changed the configure script so that the expected string argument to the -t (or --enable-threading=) option that enables OpenMP multithreading is 'openmp'. The previous expected string, 'omp', is still supported but should be considered deprecated. commit 9424af87209e4e435e2e742430945152690170b0 Merge: efa7341d c0630c40 Author: Field G. Van Zee Date: Tue Sep 27 12:51:08 2016 -0500 Merge branch 'compose' commit 7f32dd57c6bd41c0704341752842277dd6a4c8eb Author: Shaden Smith Date: Sat Sep 17 11:33:57 2016 -0500 Adds sanity check to configuration choice. commit efa7341df0b0115926aa8a6e8a4ebfb24fdbf11e Merge: 121c39d4 e1453f68 Author: Field G. Van Zee Date: Fri Sep 16 11:01:57 2016 -0500 Merge pull request #92 from ShadenSmith/readme_fix Fixes broken URL in README.md commit e1453f68f6afd90ae9a29b7a5faa46aa79bbf741 Author: Shaden Smith Date: Fri Sep 16 09:29:28 2016 -0500 Fixes broken URL in README.md commit b922d7563422e14c49a4677bc6ae088a408861ed Author: Field G. Van Zee Date: Tue Aug 23 13:38:36 2016 -0500 Avoid compiling BLAS/CBLAS files when disabled. Details: - Updated the top-level Makefile, build/config.mk.in template, and configure script so that object files corresponding to source files belonging to the BLAS compatibility layer are not compiled (or archived) when the compatibility layer is disabled. (Same for CBLAS.) Thanks to Devin Matthews for suggesting this optimization. - Slight change to the way configure handles internal variables. Instead of converting (overwriting) some, such as enable_blas2blis and enable_cblas, from a "yes" or "no" to a "1" or "0" value, the latter are now stored in new variables that live alongside the originals (with the suffix "_01"). This is convenient since some values need to be sed-substituted into the config.mk.in template, which requires "yes" or "no", while some need to be written to the bli_config.h.in template, which requires "0" or "1". Updated BLIS4 TOMS citation in README.md. Added complex gemm micro-kernels for haswell. Details: - Defined cgemm (3x8) and zgemm (3x4) micro-kernels for haswell-based architectures. As with their real domain brethren, these kernels perfer row storage, (though this doesn't affect most users due to high-level optimizations in most level-3 operations that induce a transpose to whatever storage preference the kernel may have). Change-Id: I512ab90784ecbb7cdaee24928d2ccebb544ba5c1 commit 69826110bab2a064ec76457c24843d28f2581281 Merge: 64598ee4 a58dd35e Author: Pradeep Rao Date: Wed Sep 14 03:26:25 2016 -0400 Merge "Implemented trsm single precision for lower triangular matrices, files added bli_trsm_l_int_6x16.cfiles modified bli_kernel.h to enable optimized trsm microkernel and test_trsm.c is modified to test trsm single precision" into amd-staging commit c0630c4024b08750043a2942a3e8a037aa6b6259 Author: Field G. Van Zee Date: Mon Sep 12 13:59:02 2016 -0500 Added debugging printf()'s to bli_l3_thrinfo.c. Details: - Added optional printf() statements to print out thread communicator info as the thrinfo_t structure is built in bli_l3_thrinfo.c. - Minor changes to frame/thread/bli_thrinfo.h. commit 7b3bf1ffcd7160ccbf6c2518af6d88f6742e4977 Merge: 35509818 121c39d4 Author: Field G. Van Zee Date: Tue Sep 6 15:47:13 2016 -0500 Merge branch 'master' into compose commit 121c39d455f2db6f7ce6802ba7f73ad5e088c68c Author: Field G. Van Zee Date: Mon Sep 5 13:11:42 2016 -0500 Added complex gemm micro-kernels for haswell. Details: - Defined cgemm (3x8) and zgemm (3x4) micro-kernels for haswell-based architectures. As with their real domain brethren, these kernels perfer row storage, (though this doesn't affect most users due to high-level optimizations in most level-3 operations that induce a transpose to whatever storage preference the kernel may have). commit 35509818cbea1598b123421f81c42120889a03c3 Author: Field G. Van Zee Date: Wed Aug 31 17:34:15 2016 -0500 Added, moved some thread barriers. Details: - Removed thread barriers from the end of the loop bodies of bli_gemm_blk_var1(), bli_gemm_blk_var2(), bli_trsm_blk_var1(), and bli_trsm_blk_var2(). - Moved the thread barrier at the end of bli_packm_int() to the end of bli_l3_packm(), and added missing barriers to that function. - Removed the no longer necessary (and now incorrect) ochief guard in bli_gemm3m3_packa() on the bli_obj_scalar_reset() on C. - Thanks to Tyler Smith for help with these changes. commit 64598ee4cfb86f64abbd4bcef5a82ba0d5565b67 Author: sthangar Date: Wed Aug 31 12:54:50 2016 +0530 fixed the symlink issue Change-Id: I2186d529f295c576597c189e1ae219bc1a83f955 commit abd61f9fa75d77a96d1491b3e035451ee73238fe Author: Field G. Van Zee Date: Tue Aug 30 12:34:19 2016 -0500 Updated BLIS4 TOMS citation in README.md. commit 8a2373f26ba8fcd5b2d7b2cc72cb8b2e1f841a03 Author: sthangar Date: Mon Aug 29 14:10:45 2016 +0530 Norm 2 optimization Change-Id: Ide9decaccd20bf0ccc32c9abb6556e038dceed2b commit fdc663902347aa252ea88cf09ce24ab748958dff Author: sthangar Date: Mon Aug 29 10:43:38 2016 +0530 Placed 1 and 1f AMD optimized AVX routines under zen folder Change-Id: I26795211ef11d232ed794ce36dd0a9c1f8706328 commit 701b9aa3ff028decbf90efac0dca5bd64fe26269 Author: Field G. Van Zee Date: Fri Aug 26 19:04:45 2016 -0500 Redesigned control tree infrastructure. Details: - Altered control tree node struct definitions so that all nodes have the same struct definition, whose primary fields consist of a blocksize id, a variant function pointer, a pointer to an optional parameter struct, and a pointer to a (single) sub-node. This unified control tree type is now named cntl_t. - Changed the way control tree nodes are connected, and what computation they represent, such that, for example, packing operations are now associated with nodes that are "inline" in the tree, rather than off- shoot braches. The original tree for the classic Goto gemm algorithm was expressed (roughly) as: blk_var2 -> blk_var3 -> blk_var1 -> ker_var2 | | -> packb -> packa and now, the same tree would look like: blk_var2 -> blk_var3 -> packb -> blk_var1 -> packa -> ker_var2 Specifically, the packb and packa nodes perform their respective packing operations and then recurse (without any loop) to a subproblem. This means there are now two kinds of level-3 control tree nodes: partitioning and non-partitioning. The blocked variants are members of the former, because they iteratively partition off submatrices and perform suboperations on those partitions, while the packing variants belong to the latter group. (This change has the effect of allowing greatly simplified initialization of the nodes, which previously involved setting many unused node fields to NULL.) - Changed the way thrinfo_t tree nodes are arranged to mirror the new connective structure of control trees. That is, packm nodes are no longer off-shoot branches of the main algorithmic nodes, but rather connected "inline". - Simplified control tree creation functions. Partitioning nodes are created concisely with just a few fields needing initialization. By contrast, the packing nodes require additional parameters, which are stored in a packm-specific struct that is tracked via the optional parameters pointer within the control tree struct. (This parameter struct must always begin with a uint64_t that contains the byte size of the struct. This allows us to use a generic function to recursively copy control trees.) gemm, herk, and trmm control tree creation continues to be consolidated into a single function, with the operation family being used to select among the parameter-agnostic macro-kernel wrappers. A single routine, bli_cntl_free(), is provided to free control trees recursively, whereby the chief thread within a groups release the blocks associated with mem_t entries back to the memory broker from which they were acquired. - Updated internal back-ends, e.g. bli_gemm_int(), to query and call the function pointer stored in the current control tree node (rather than index into a local function pointer array). Before being invoked, these function pointers are first cast to a gemm_voft (for gemm, herk, or trmm families) or trsm_voft (for trsm family) type, which is defined in frame/3/bli_l3_var_oft.h. - Retired herk and trmm internal back-ends, since all execution now flows through gemm or trsm blocked variants. - Merged forwards- and backwards-moving variants by querying the direction from routines as a function of the variant's matrix operands. gemm and herk always move forward, while trmm and trsm move in a direction that is dependent on which operand (a or b) is triangular. - Added functions bli_thread_get_range_mdim(), bli_thread_get_range_ndim(), each of which takes additional arguments and hides complexity in managing the difference between the way ranges are computed for the four families of operations. - Simplified level-3 blocked variants according to the above changes, so that the only steps taken are: 1. Query partitioning direction (forwards or backwards). 2. Prune unreferenced regions, if they exist. 3. Determine the thread partitioning sub-ranges. 4. Determine the partitioning blocksize (passing in the partitioning direction) 5. Acquire the curren iteration's partitions for the matrices affected by the current variants's partitioning dimension (m, k, n). 6. Call the subproblem. - Instantiate control trees once per thread, per operation invocation. (This is a change from the previous regime in which control trees were treated as stateless objects, initialized with the library, and shared as read-only objects between threads.) This once-per-thread allocation is done primarily to allow threads to use the control tree as as place to cache certain data for use in subsequent loop iterations. Presently, the only application of this caching is a mem_t entry for the packing blocks checked out from the memory broker (allocator). If a non-NULL control tree is passed in by the (expert) user, then the tree is copied by each thread. This is done in bli_l3_thread_decorator(), in bli_thrcomm_*.c. - Added a new field to the context, and opid_t which tracks the "family" of the operation being executed. For example, gemm, hemm, and symm are all part of the gemm family, while herk, syrk, her2k, and syr2k are all part of the herk family. Knowing the operation's family is necessary when conditionally executing the internal (beta) scalar reset on on C in blocked variant 3, which is needed for gemm and herk families, but must not be performed for the trmm family (because beta has only been applied to the current row-panel of C after the first rank-kc iteration). - Reexpressed 3m3 induced method blocked variant in frame/3/gemm/ind to comform with the new control tree design, and renamed the macro- kernel codes corresponding to 3m2 and 4m1b. - Renamed bli_mem.c (and its APIs) to bli_memsys.c, and renamed/relocated bli_mem_macro_defs.h from frame/include to frame/base/bli_mem.h. - Renamed/relocated bli_auxinfo_macro_defs.h from frame/include to frame/base/bli_auxinfo.h. - Fixed a minor bug whereby the storage-to-ukr-preference matching optimization in the various level-3 front-ends was not being applied properly when the context indicated that execution would be via an induced method. (Before, we always checked the native micro-kernel corresponding to the datatype being executed, whereas now we check the native micro-kernel corresponding to the datatype's real projection, since that is the micro-kernel that is actually used by induced methods. - Added an option to the testsuite to skip the testing of native level-3 complex implementations. Previously, it was always tested, provided that the c/z datatypes were enabled. However, some configurations use reference micro-kernels for complex datatypes, and testing these implementations can slow down the testsuite considerably. commit a58dd35ed7b5b77a6b272655d2edd7a822b8fa87 Author: Kiran Varaganti Date: Fri Aug 26 14:55:12 2016 +0530 Implemented trsm single precision for lower triangular matrices, files added bli_trsm_l_int_6x16.cfiles modified bli_kernel.h to enable optimized trsm microkernel and test_trsm.c is modified to test trsm single precision Change-Id: Ibddf989f4aad577e89558673e1038cf6ece654d9 commit 73517f522b69de429dd7f3df60a70c068149ab28 Merge: c6f5c215 50293da3 Author: Field G. Van Zee Date: Tue Aug 23 13:46:59 2016 -0500 Merge branch 'master' into compose commit 50293da38d5f2b7be9bbc94b9e85aacb6a10f672 Author: Field G. Van Zee Date: Tue Aug 23 13:38:36 2016 -0500 Avoid compiling BLAS/CBLAS files when disabled. Details: - Updated the top-level Makefile, build/config.mk.in template, and configure script so that object files corresponding to source files belonging to the BLAS compatibility layer are not compiled (or archived) when the compatibility layer is disabled. (Same for CBLAS.) Thanks to Devin Matthews for suggesting this optimization. - Slight change to the way configure handles internal variables. Instead of converting (overwriting) some, such as enable_blas2blis and enable_cblas, from a "yes" or "no" to a "1" or "0" value, the latter are now stored in new variables that live alongside the originals (with the suffix "_01"). This is convenient since some values need to be sed-substituted into the config.mk.in template, which requires "yes" or "no", while some need to be written to the bli_config.h.in template, which requires "0" or "1". commit 22dd6a353ddb56614309c01533b1a94c9fd32bca Merge: cdfb3c3f f20ed388 Author: praveeng Date: Tue Aug 23 15:15:35 2016 +0530 Merge master code as on 2016_08_23 to amd-staging branch by praveeng Changes to be committed: modified: frame/thread/bli_mutex_openmp.h modified: frame/thread/bli_mutex_pthreads.h Change-Id: Ica522edbb1d0173f53f38d5057b1f7aef73666be commit c6f5c215ee793d03ea834469fc2adc53feaffc42 Merge: d52cb767 16a4c7a8 Author: Field G. Van Zee Date: Mon Aug 22 17:33:02 2016 -0500 Merge branch 'master' into compose commit f20ed3885d628992fab88690f629a5a2bab3eb88 Merge: 02ac597e 4bc842ca Author: praveeng Date: Mon Aug 22 15:27:33 2016 +0530 Merge branch 'master' of https://github.com/clMathLibraries/blis-amd for "Fixed bugs in bli_mutex_init() and friends." commit 02ac597e4b9be2670d9fff65d28552f8e1ec81b3 Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 84e41cc73c9c87ce64582acd4264b8e1b5316482 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 30ccfcee82db93d0109d1571242e2db925e95d0a Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit aeca25cd63fc8971f8fe7809599c57853f976548 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 6b2274864b36fd1019d97bcc4ca6dd7a57ef16d9 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit daa7a9ecb25982f2551adbd95e65f8ba97cfe944 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 5f66a4aa05aeffcb6eb587851d78d9527319466c Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit c6cbd78d2388c08824822b91a1c36ac4349bb67f Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 9219a9060762525f87ebbf556d78fe8621858513 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 728573296efa7cf14d2381570e116509dfe2a240 Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit ad7862e291c240505c733a41d231b1a126ade73c Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit ad4b471a25ce77867295e5529dfc787e7c18b03f Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 55d641363fcd8bdfdabbd7c22822fa2d0b7f3fa6 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit f3b6b15f6d591d323802bd6c81c522a02056506d Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 16a4c7a823d60707ed9272f5d36e5c5d54c0ba4b Author: Field G. Van Zee Date: Fri Aug 19 11:38:36 2016 -0500 Fixed bugs in bli_mutex_init() and friends. Details: - Fixed a couple of bugs that affected OpenMP and POSIX threads configurations that resulted in compiler errors and warnings due to type mismatch, and in the case of pthreads, a missing function argument. The bugs are fairly recent, introduced in a017062. commit c8e4ef93953ba2b79fb7e0973c08469c0e28a2cd Author: Devin Matthews Date: Wed Aug 3 16:13:03 2016 -0500 Add prefetchw to 30x8 kernel. commit 4b5a2f3d6e7ffeb5cc2be8448554f5c2083ad68f Merge: 380736bf 9f52a587 Author: Devin Matthews Date: Wed Aug 3 16:09:51 2016 -0500 Merge remote-tracking branch 'origin/knl' into knl # Conflicts: # kernels/x86_64/knl/3/bli_dgemm_opt_24x8.c commit 380736bfe955efbdd7274c90b6fd635688e83bc4 Author: Devin Matthews Date: Wed Aug 3 16:08:28 2016 -0500 Add (new) 30x8 KNL kernel and fix non-scatter prefetch bug. commit 9f52a587dee855daa73c194e41b6951416544e9a Author: Devin Matthews Date: Wed Aug 3 16:03:53 2016 -0500 Try prefetchw[t1] instead of regular prefetch for C. commit 8945a1512d366bc6a8a85718d12cbf5de6f2898b Author: Devin Matthews Date: Wed Aug 3 11:28:24 2016 -0500 This version gets ~1550 GFLOPs on KNL wuth 16x4. commit cdfb3c3f29d321033fca106aa58ab67ead90a95d Merge: 50a2f2ef 4bc842ca Author: praveeng Date: Fri Jul 29 12:45:04 2016 +0530 Merge master code as on 2016_07_29 to amd-staging branch by praveeng Change-Id: Ic78b84d8b8d10158fb2a612f9a64bbc7b1f9b486 commit 4bc842ca3a64e658c0808bfe4c5693a5ace97923 Merge: 117f8838 b0d510bf Author: praveeng Date: Thu Jul 28 17:32:12 2016 +0530 Merge branch 'master' of publicrepo commit 117f8838511a478aa16137e770d27dd21f4227c5 Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 2fcdc28f1055d385b2e662aa920fb97c472394d7 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 1b5d104afe0628b8b6c0650f1e58cfb08be67004 Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit d81273047bff56501e9413a90991d3d1f8b56a06 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 65905c3011a11cda95761681d4ae84337e46bdb5 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 23cca231be10fe1797aed451bcbc69d38c78bc0c Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 922e3091702f25e3287b417719a33adbd5bbf138 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit b0d510bf0e4dfd177f9e4ae0069f41921e2ecdc1 Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 5ebeece5b4a8df81d59ca7558b278a4263d15128 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 6ce4c022ebdea00c2b951090e3c2e9e88735b9ce Author: Devin Matthews Date: Wed Jul 27 16:26:36 2016 -0500 Switch back to 24x8. I could only squeeze 24.5GFLOP out of 8x24, and scalability is not improved. commit d52cb7671509592a8078729477b40b60380518a2 Merge: 95abea46 c31b1e7b Author: Field G. Van Zee Date: Wed Jul 27 16:04:55 2016 -0500 Merge branch 'master' into compose commit c31b1e7b9d659b96433a87e5aecb90e457a104cc Author: Field G. Van Zee Date: Wed Jul 27 15:58:07 2016 -0500 Relax alignment restrictions for sandybridge ukrs. Details: - Relaxed the base pointer and leading dimension alignment restrictions in the sandybridge gemm microkernels, allowing the use of vmovups/vmovupd instead of vmovaps/vmovapd. These change mimic those made to the haswell microkernels in e0d2fa0 and ee2c139. - Updated testsuite modules as well as standalone test drivers in 'test' directory to use DBL_MAX as the initial time candidate. Thanks to Devin Matthews for suggesting this change. - Inserted #include "float.h" into bli_system.h (to gain access to DBL_MAX). - Minor update (vis-a-vis contexts) to driver code in test/3m4m. commit b8f2b55532849d45d379afbdd05a52ff6100800d Author: Devin Matthews Date: Wed Jul 27 15:22:55 2016 -0500 Try an 8x24 kernel for the hell of it. commit 7ede5863ae3567f7c0852efc2d5cd649ca19e0f3 Author: Devin Matthews Date: Wed Jul 27 13:41:27 2016 -0600 Allocate pack buffer on MCDRAM for KNL. commit ad89ed2e829c7b261d8ba0998a3cb83ad576ee04 Merge: 2c9de740 81e2b05f Author: Devin Matthews Date: Wed Jul 27 11:45:40 2016 -0500 Merge branch 'knl' of github.com:devinamatthews/blis into knl commit 2c9de740edb66c4692c200731763bbd1d3171ccb Author: Devin Matthews Date: Wed Jul 27 11:44:54 2016 -0500 This version gets ~26GF on one core. commit 81e2b05f31bca4e1e1676e7b533d1868d9f9be33 Author: Devin Matthews Date: Wed Jul 27 11:39:05 2016 -0500 Add optimized packing kernels for KNL. commit a7d8ca97b8d835c32d90ff20a565c82733f014a8 Author: Devin Matthews Date: Mon Jul 25 15:15:13 2016 -0500 All fixed. commit 963d0393b023f4134bb0c682923faf9964c0e645 Author: Devin Matthews Date: Mon Jul 25 14:40:53 2016 -0500 Add 24xk pack kernel. commit 117b76739afba481768897d2580f8365d3345417 Author: Devin Matthews Date: Mon Jul 25 13:53:07 2016 -0500 In the midst of debugging. commit 8c0a4fd1d3535d608a9a309a61ffee0a73c3646f Author: Devin Matthews Date: Mon Jul 25 13:09:24 2016 -0500 Fix some row/column confusion. commit c44f9f96930312125b15e64c326ab5ab5cc02633 Author: Devin Matthews Date: Mon Jul 25 12:02:24 2016 -0500 Simplify displacements -- clang assembler was badly botching EVEX compressed displacements giving false alarms for instruction length. commit e0cce177cc1b47ec9f11ac0556241feaa3564df1 Author: Devin Matthews Date: Mon Jul 25 10:02:25 2016 -0500 Minor fixes for 8x24 KNL kernel. commit 50a2f2efcbeb46537f1deaa8e44dc579a4e49eb8 Merge: 1aa77dfc cfd46c88 Author: praveeng Date: Mon Jul 25 17:01:20 2016 +0530 Merge master code as on 2016_07_25 to amd-staging branch by praveeng Change-Id: I84886ae241db2aac0bef6b7ef399f04aa8bca16d commit cfd46c88d59c8f61d5e7cf768d606e4c44623584 Merge: f493bf4d a017062f Author: praveeng Date: Mon Jul 25 15:38:13 2016 +0530 Merge remote-tracking branch 'publicrepo/master' commit f493bf4d704fe0e967783cd6e6877d3302c056a1 Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit 65735bbedf75784c48bd11e05b3fdc98fc66b4bc Author: Devin Matthews Date: Sun Jul 24 21:50:32 2016 -0500 Switch to 24x8 kernel, unrolled by 16. commit 45d5dc97177117220bd9dd0abf85aafc185acad1 Author: Devin Matthews Date: Sun Jul 24 14:25:26 2016 -0500 Add 24x8 "KNC-style" kernel for KNL. commit 95abea46f86816fddfc9ff0abfa52880801461be Merge: d0dfe5b5 a017062f Author: Field G. Van Zee Date: Sat Jul 23 15:38:33 2016 -0500 Merge branch 'master' into compose commit a017062fdf763037da9d971a028bb07d47aa1c8a Author: Field G. Van Zee Date: Fri Jul 22 17:02:59 2016 -0500 Integrated "memory broker" (membrk_t) abstraction. Details: - Integrated a patch originally authored and submitted by Ricardo Magana of HP Enterprise. The changeset inserts use of a new object type, membrk_t, (memory broker) that allows multiple sets of memory pools on, for example, separate NUMA nodes, each of which has a separate memory space. - Added membrk field to cntx_t and defined corresponding accessor macros. - Added membrk field to mem_t object and defined corresponding accessor macros. - Created new bli_membrk.c file, which contains the new memory broker API, including: bli_membrk_init(), bli_membrk_finalize() bli_membrk_acquire_[mv](), bli_membrk_release(), bli_membrk_init_pools(), bli_membrk_reinit_pools(), bli_membrk_finalize_pools(), bli_membrk_pool_size() - In bli_mem.c, changed function calls to bli_mem_init_pools() -> bli_membrk_init() bli_mem_reinit_pools() -> bli_membrk_reinit() bli_mem_finalize_pools() -> bli_membrk_finalize() - In bli_packv_init.c, bli_packm_init.c, changed function calls to: bli_mem_acquire_[mv]() -> bli_membrk_acquire_[mv]() bli_mem_release() -> bli_membrk_release() - Added bli_mutex.c and related files to frame/thread. These files define abstract mutexes (locks) and corresponding APIs for pthreads, openmp, or single-threaded execution. This new API is employed within functions such as bli_membrk_acquire_[mv]() and bli_membrk_release(). commit 8ff2e069c48c12fd06b9c48c6b3aeb4ea9b0e6e1 Author: Devin Matthews Date: Fri Jul 22 16:22:26 2016 -0500 Add 4x unrolled variant for KNL microkernel. commit 9cb2ed9b0c25f31a22c1c9719b062fa665ad7adf Author: Devin Matthews Date: Fri Jul 22 16:10:30 2016 -0500 Git rid of one RBX update. commit 451bde076f0320d60cd2475cfb048ac4a2b798bb Author: Devin Matthews Date: Fri Jul 22 15:43:00 2016 -0500 Add some more knobs to twiddle for KNL microkernel. commit 8c6e621c099521e7a4d87e007bb8224faa5f33a3 Author: Devin Matthews Date: Fri Jul 22 15:05:15 2016 -0500 Make knl conform to new kernel dir structure. commit ce7214c6618d6f22f4ce2ee452336236916d1f30 Merge: 119d0399 ce59f811 Author: Devin Matthews Date: Fri Jul 22 14:59:53 2016 -0500 Merge remote-tracking branch 'origin/master' into knl commit ce59f81108ec9aea918a7e77030da8acfdd397ce Merge: ff41153f 707a2b7f Author: Field G. Van Zee Date: Fri Jul 22 14:48:14 2016 -0500 Merge pull request #88 from devinamatthews/32bit-dim_t Handle 32-bit dim_t in 64-bit microkernels. commit 707a2b7faca137cca7cab7b11a12c44ddaf7ad53 Author: Devin Matthews Date: Fri Jul 22 13:49:44 2016 -0500 Somehow forgot the most important microkernel. commit 47ec045056351ac4f0791c071fa0daaa81699c8c Merge: 08f1d6b6 ff41153f Author: Devin Matthews Date: Fri Jul 22 13:45:23 2016 -0500 Merge remote-tracking branch 'upstream/master' into 32bit-dim_t commit 08f1d6b6fa344275de0f675f69737145ccf6646a Author: Devin Matthews Date: Fri Jul 22 13:44:37 2016 -0500 Use 64-bit intermediate variable for k for architectures that do 64-bit loads in case dim_t is 32-bit. commit ff41153f4eb7f38ed94bdd9a3fd81fb979f3f401 Merge: f9214ced e0d2fa0d Author: Field G. Van Zee Date: Fri Jul 22 13:21:03 2016 -0500 Merge pull request #86 from devinamatthews/haswell-vmovups Remove alignment restrictions on C in haswell kernel. commit e0d2fa0d835ab49366aeb790363bb2b571d36ed8 Author: Devin Matthews Date: Fri Jul 22 12:56:51 2016 -0500 Relax alignment restrictions for haswell sgemm. commit f9214ced97392861f5a0ea72abfcf6f41faf674c Merge: 413d62ac 08666eaa Author: Field G. Van Zee Date: Fri Jul 22 12:16:39 2016 -0500 Merge pull request #85 from devinamatthews/qopenmp Change -openmp to -fopenmp for icc. commit ee2c139df6ad53c6aec8a67ab23b3b1912e8d259 Author: Devin Matthews Date: Fri Jul 22 12:06:03 2016 -0500 Remove alignment restrictions on C in haswell kernel. commit 08666eaa20d8a31f2f92f944e5bfa7c1558c53e4 Author: Devin Matthews Date: Fri Jul 22 11:07:34 2016 -0500 Change -openmp to -fopenmp for icc. commit 119d0399428905053265f3aca1cc8cc1fde3b363 Author: Devin Matthews Date: Fri Jul 22 10:23:31 2016 -0500 Add 8x24 KNL kernel. commit 1aa77dfc1dc183d16e0b6a1196d9c263f021e83d Merge: 9101a9c8 ec9f5983 Author: praveeng Date: Thu Jul 21 14:22:40 2016 +0530 Merge master code as on 2016_07_21 to amd-staging branch by praveeng Change-Id: Ic7d0a21101358f08147736e7f1884e7409937344 commit b58cda9eba0c1e175460aae109baf792d29ba5bf Merge: 318f063d 413d62ac Author: Devin Matthews Date: Tue Jul 19 14:09:09 2016 -0500 Merge remote-tracking branch 'origin/master' into knl # Conflicts: # frame/base/bli_threading.h # frame/include/blis.h # frame/thread/bli_thread.c commit ec9f59836b32260c29ff1cd24e629c7d8de14992 Merge: 197e182f 763babe4 Author: praveeng Date: Mon Jul 18 12:56:25 2016 +0530 Merge branch 'master' of https://github.com/clMathLibraries/blis-amd commit 197e182fcbf1340fd4a202fac58bea6cfcfa9e2f Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 41fb32711031e7ec86b062aa7f53255d1f5905e2 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit d0dfe5b5372cc7558ee9c4104b29f82eecc7ed61 Merge: 31def12e 413d62ac Author: Field G. Van Zee Date: Thu Jul 14 11:01:06 2016 -0500 Merge branch 'master' into compose commit 9101a9c880e3934f8a63ffc7fe15f5fc1077a73d Author: sthangar Date: Wed Jul 13 16:51:14 2016 +0530 Checked in optimized 1V kernels along with benchmark codes. Also incorporated review comments for 1F kernels Change-Id: I035c0d39e6b0bed28e6e2041242186c49f6ed55b commit 763babe488880b42c86c7fc207aa7665bd0ff9f7 Merge: 357c990b 413d62ac Author: praveeng Date: Wed Jul 13 11:57:19 2016 +0530 Merge remote-tracking branch 'publirepo/master' commit 413d62aca28edabba56605a9f87d5b715831e1db Author: Field G. Van Zee Date: Tue Jul 12 15:02:52 2016 -0500 README update (use official ACM TOMS links). commit dfa431f696db2df4065ea454df268a2e0bc02eac Author: Field G. Van Zee Date: Tue Jul 12 14:21:19 2016 -0500 README update (BLIS2 TOMS article now in-print). commit 357c990bdd7bd5667aac5adf1bab3712973e7414 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 8aee306300adb099b66036f2c2f7f3996433cf49 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 31def12e2629f187e40f93f6bae9e26a6c2660e2 Author: Field G. Van Zee Date: Thu Jun 30 15:19:20 2016 -0500 First phase of control tree redesign. Details: - These changes constitute the first set of changes in preparation to revamping the structure and use of control trees in BLIS. Modifications in this commit don't affect the control tree code yet, but rather lay the groundwork. - Defined wrappers for the following functions, where the the wrappers each take a direction parameter of a new enumerated type (BLIS_BWD or BLIS_FWD), dir_t, and executes the correct underlying function. - bli_acquire_mpart_*() and _vpart_*() - bli_*_determine_kc_[fb]() - bli_thread_get_range_*() and bli_thread_get_range_weighted_*() - Consolidated all 'f' (forwards-moving) and 'b' (backwards-moving) blocked variants for trmm and trsm, and renamed gemm and herk variants accordingly. The direction is now queried via routines such as bli_trmm_direct(), which deterines the direction from the implied side and uplo parameters. For gemm and herk, it is uncondtionally BLIS_FWD. - Defined wrappers to parameter-specific macrokernels for herk, trmm, and trsm, e.g. bli_trmm_xx_ker_var2(), that execute the correct underlying macrokernel based on the implied parameters. The same logic used to choose the dir_t in _direct() functions is used here. - Simplified the function pointer arrays in _int() functions given the consolidation and dir_t querying mentioned above. - Function signature (whitespace) reformatting for various functions. - Removed old code in various 'old' directories. commit 405c9d46344d93c3eab5572b233900b50ca50d68 Author: sthangar Date: Wed Jun 22 12:18:54 2016 +0530 Check-in the fused kernels optimized for Zen Change-Id: I7b2f467b960e7b9a285f06e47be87de122e5fa24 commit 232754feecf29452987666b9f5ebba2619bfd0b0 Author: Field G. Van Zee Date: Tue Jun 21 14:25:39 2016 -0500 Fixed compiler warning in rand[vm], randn[vm]. Details: - Fixed compiler warnings about unused variables related to the disabling of normalization in the structured cases of the rand[vm] and randn[vm] operations. commit a89555d1605574f3685813dcc972b636dd61264d Author: Field G. Van Zee Date: Fri Jun 17 14:08:35 2016 -0500 Added randn[vm] operations, support in testsuite. Details: - Defined a new randomization operation, randn, on vectors and matrices. The randnv and randnm operations randomize each element of the target object with values from a narrow range of values. Presently, those values are all integer powers of two, but they do not need to be powers of two in order to achieve the primary goal, which is to initialize objects that can be operated on with plenty of precision "slack" available to allow computations that avoid roundoff. Using this method of randomization makes it much more likely that testsuite residuals of properly-functioning operations are close to zero, if not exactly zero. - Updated existing randomization operations randv and randm to skip special diagonal handling and normalization for matrices with structure. This is now handled by the testsuite modules by explicitly calling a testsuite function that loads the diagonal (and scales off-diagonal elements). - Added support for randnv and randnm in the testsuite with a new switch in input.general that universally toggles between use of the classic randv/randm, which use real values on the interval [-1,1], and randnv/randnm, which use only values from a narrow range. Currently, the narrow range is: +/-{2^0, 2^-1, 2^-2, 2^-3, 2^-4, 2^-5, 2^-6}, as well as 0.0. - Updated testsuite modules so that a testsutie wrapper function is called instead of directly calling the randomization operations (such as bli_randv() and bli_randm()). This wrapper also takes a bool_t that indicates whether the object's elements should be normalized. (NOTE: As alluded to above, in the test modules of triangular solve operations such as trsv and trsm, we perform the extra step of loading the diagonal.) - Defined a new level-0 operation, invertsc, which inverts a scalar. - Updated the abval2ris and sqrt2ris level-0 macros to avoid an unlikely but possible divide-by-zero. - Updated function signature and prototype formatting in testsuite. commit 318f063dcbd8b594969e401bc99146d24b01066a Author: Devin Matthews Date: Wed Jun 8 17:46:50 2016 -0500 Add new KNL microkernel derived from Haswell. commit 096895c5d538a7f8817603d7cf28c52e99340def Author: Field G. Van Zee Date: Mon Jun 6 13:32:04 2016 -0500 Reorganized code, APIs related to multithreading. Details: - Reorganized code and renamed files defining APIs related to multithreading. All code that is not specific to a particular operation is now located in a new directory: frame/thread. Code is now organized, roughly, by the namespace to which it belongs (see below). - Consolidated all operation-specific *_thrinfo_t object types into a single thrinfo_t object type. Operation-specific level-3 *_thrinfo_t APIs were also consolidated, leaving bli_l3_thrinfo_*() and bli_packm_thrinfo_*() functions (aside from a few general purpose bli_thrinfo_*() functions). - Renamed thread_comm_t object type to thrcomm_t. - Renamed many of the routines and functions (and macros) for multithreading. We now have the following API namespaces: - bli_thrinfo_*(): functions related to thrinfo_t objects - bli_thrcomm_*(): functions related to thrcomm_t objects. - bli_thread_*(): general-purpose functions, such as initialization, finalization, and computing ranges. (For now, some macros, such as bli_thread_[io]broadcast() and bli_thread_[io]barrier() use the bli_thread_ namespace prefix, even though bli_thrinfo_ may be more appropriate.) - Renamed thread-related macros so that they use a bli_ prefix. - Renamed control tree-related macros so that they use a bli_ prefix (to be consistent with the thread-related macros that were also renamed). - Removed #undef BLIS_SIMD_ALIGN_SIZE from dunnington's bli_kernel.h. This #undef was a temporary fix to some macro defaults which were being applied in the wrong order, which was recently fixed. commit 232530e88ff99f37abcae5b6fb5319a9a375a45f Merge: 4bcabd1b eef37f8b Author: Tyler Michael Smith Date: Wed Jun 1 15:14:10 2016 -0500 Merge commit 'refs/pull/81/head' of https://github.com/flame/blis Conflicts: frame/base/bli_threading_pthreads.c frame/base/bli_threading_pthreads.h commit 4bcabd1bf60688c38cf562459fc5e8be8b831756 Author: Tyler Michael Smith Date: Wed Jun 1 13:27:28 2016 -0500 Use spin locks instead of pthread barriers commit eef37f8b4d81845a6ba4bf25586d32b50c3e8a68 Author: Jeff Hammond Date: Sun May 29 22:28:13 2016 -0700 use GCC intrinsic instead of pthread_mutex for atomic increment and fetch commit 9dcd6f05c4c3ff2ce7cd87a9951a96ebef22681e Author: Field G. Van Zee Date: Tue May 24 13:15:32 2016 -0500 Implemented developer-configurable malloc()/free(). Details: - Replaced all instances of bli_malloc() and bli_free() with one of: - bli_malloc_pool()/bli_free_pool() - bli_malloc_user()/bli_free_user() - bli_malloc_intl()/bli_free_intl() each of which can be configured to call malloc()/free() substitutes, so long as the substitute functions have the same function type signatures as malloc() and free() defined by C's stdlib.h. The _pool() function is called when allocating blocks for the memory pools (used for packing buffers, primarily), the _user() function is called when obj_t's are created (via bli_obj_create() and friends), and the _intl() function is called for internal use by BLIS, such as when creating control tree nodes or temporary buffers for manipulating internal data structures. Substitutes for any of the three types of bli_malloc() may be specified by #defining the following pairs of cpp macros in bli_kernel.h: - BLIS_MALLOC_POOL/BLIS_FREE_POOL - BLIS_MALLOC_USER/BLIS_FREE_USER - BLIS_MALLOC_INTL/BLIS_FREE_INTL to be the name of the substitute functions. (Obviously, the object code that contains these functions must be provided at link-time.) These macros default to malloc() and free(). Subsitute functions are also automatically prototyped by BLIS (in bli_malloc_prototypes.h). - Removed definitions for bli_malloc() and bli_free(). - Note that bli_malloc_pool() and bli_malloc_user() are now defined in terms of a new function, bli_malloc_align(), which aligns memory to an arbitrary (power of two) alignment boundary, but does so manually, whereas before alignment was performed behind the scenes by posix_memalign(). Currently, bli_malloc_intl() is defined in terms of bli_malloc_noalign(), which serves as a simple wrapper to the designated function that is passed in (e.g. BLIS_MALLOC_INTL). Similarly, there are bli_free_align() and bli_free_noalign(), which are used in concert with their bli_malloc_*() counterparts. commit 9dd440109a9d964f5cd286e9f83c487ad703e1e4 Author: Jeff Hammond Date: Sat May 21 15:21:58 2016 -0700 fix 404 link to BuildSystem Google Code is dead. Long live GitHub! commit d309f20b7376a68efa3b864ad790c2021c071655 Author: Field G. Van Zee Date: Wed May 18 15:13:53 2016 -0500 Added alignment switch to testsuite. Details: - Added a new input parameter to input.general that globally toggles whether testsuite tests are performed on objects whose buffers and leading dimensions have been aligned, and changed the implementation of libblis_test_mobj_create() to employ alignment (or not) regardless of whether row, column, or general storage is being tested. - Updated configure script's "--help" text to indicate default behavior for internal integer type size and BLAS/CBLAS integer type size options. commit 32db0adc218ea4ae370164dbe8d23b41cd3526d3 Author: Field G. Van Zee Date: Tue May 17 15:20:16 2016 -0500 Generate prototypes for user-defined packm kernels. Details: - Created template prototypes for packm kernels (in bli_l1m_ker.h), and then redefined reference packm kernels' prototyping headers in terms of this template, as is already done for level-1v, -1f, and -3 kernels. - Automatically generate prototypes for user-defined packm kernels in bli_kernel_prototypes.h (using the new template prototypes in bli_l1m_ker.h). - Defined packm kernel function types in bli_l1m_ft.h, including for packm kernels specific to induced methods, which are now used in bli_packm_cxk.c and friends rather than using a locally-defined function type. - In bli_packm_cxk.c, extended function pointer for packm kernels array from out to index 31 (from previous maximum of 17). This allows us to store the unrolled 30xk kernel in the array for use (on knc, for example). Note: This should have been done a long time ago. commit e3bd5ca64ae7c190ba689396c0de687b829a11fe Author: Devin Matthews Date: Thu May 12 20:54:13 2016 -0500 Fix SIMD definitions in KNL config, and a couple of fixes to C update. commit 4fe02e3d497995d94d34d3fcf5af895084cfc8b9 Author: Devin Matthews Date: Thu May 12 20:53:58 2016 -0500 Move bli_kernel.h before bli_threading.h in order of inclusion in blis.h. commit 4bcf1b35abea3f3dfc8f2fe462dcf155cf199e55 Author: Field G. Van Zee Date: Wed May 11 16:09:49 2016 -0500 Fixed bli_get_range_*() bugs in trsm variants. Details: - Fixed incorrect calls to bli_get_range_*() from within trsm blocked variants 1f, 2b, and 2f. The bug somehow went undetected since the big commit (537a1f4), and, strangely, did not manifest via the BLIS testsuite. The bug finally came to our attention when running thei libflame test suite while linking to BLIS. Thanks to Kiran Varaganti for submitting the initial report that led to this bug. commit 9cfa33023f123a6c17e987f72fba174ce073f0b6 Author: Field G. Van Zee Date: Wed May 11 16:02:30 2016 -0500 Minor updates to bli_f2c.h. Details: - Added #undef guards to certain #define statements in bli_f2c.h, and renamed the file guard to BLIS_F2C_H. This helps when #including "blis.h" from an application or library that already #includes an "f2c.h" header. commit a09a2e23eacf5328858c8318bb637c5ff3b71d08 Merge: 4dcd37eb 7c604e1c Author: Tyler Michael Smith Date: Wed May 11 10:47:11 2016 -0500 Merge pull request #76 from devinamatthews/move_simd_defs Move default SIMD-related definitions to bli_kernel_macro_defs.h commit 4dcd37eb1b12a6e08cc13df7b61391ef8363f5d8 Author: Tyler Smith Date: Tue May 10 16:28:59 2016 -0500 fixing knc simd align size commit 619dee0daec3474b4e5a55df90a61aabcae194f2 Merge: b790b3d9 7c604e1c Author: Devin Matthews Date: Tue May 10 12:13:24 2016 -0500 Merge branch 'move_simd_defs' into knl commit 7c604e1cbc1609b6e12d3ee973c08b7af5035be4 Author: Devin Matthews Date: Tue May 10 12:11:55 2016 -0500 Move default SIMD-related definitions to bli_kernel_macro_defs.h. Otherwise, configurations which customize these fail as these are now defined in bli_kernel.h. commit b790b3d9e1820f3b691676de48c291cae083452d Merge: 4f8c05c9 a7be2d28 Author: Devin Matthews Date: Tue May 10 11:49:47 2016 -0500 Merge branch 'master' into knl commit a7be2d28e8930b154d0da1d6929b54a96e210af6 Merge: 97b512ef 4b1e55ed Author: Field G. Van Zee Date: Tue May 10 11:48:51 2016 -0500 Merge pull request #74 from devinamatthews/fix_common_symbols Default-initialize all extern global variables to avoid generating common symbols. commit 4b1e55edbfe0e1cb2e7b9428424903497cb7a841 Author: Devin Matthews Date: Tue May 10 10:08:47 2016 -0500 Default-initialize all extern global variables to avoid generating common symbols. Fixes #73. commit 97b512ef62c7e25c97ed5e9eca81cd7015b2ac91 Author: Field G. Van Zee Date: Fri May 6 10:24:30 2016 -0500 Include headers from cblas.h to pull in f77_int. Details: - Added #include statements for certain key BLIS headers so that the definition of f77_int is pulled in when a user compiles application code with only #include "cblas.h" (and no other BLIS header). This is necessary since f77_int is now used within the cblas API. commit c3a4d39d03665135f1616588b5ef7c3e9ef5688d Author: Field G. Van Zee Date: Wed May 4 17:22:56 2016 -0500 Updates to haswell gemm micro-kernels. Details: - Added two new sets of [sd]gemm micro-kernels for haswell architectures, one that is 4x24/4x12 (s and d) and one that is 6x16/6x8. - Changed the haswell configuration to use the 6x16/6x8 micro-kernels by default. - Updated various Makefiles, in test, test/3m4m, and testsuite. commit 0b01d355ae861754ae2da6c9a545474af010f02e Author: Field G. Van Zee Date: Wed Apr 27 15:21:10 2016 -0500 Miscellaneous cleanups, fixes to recent commits. Details: - Fixed a typo in bli_l1f_ref.h, introduced into bbb8569, that only manifested when non-reference level-1f kernels were used. - Added an #undef BLIS_SIMD_ALIGN_SIZE to bli_kernel.h of dunnington configuration to prevent a compile-time warning until I can figure out the proper permanent fix. - Moved frame/1f/kernels/bli_dotxaxpyf_ref_var1.c out of the compilation path (into 'other' directory). _ref_var2 is used by default, which is the variant that is built on axpyf and dotxf instead of dotaxpyv. - Removed section of frame/include/bli_config_macro_defs.h pertaining to mixed datatype support. commit ed7326c836f427e2f8420b015220ce293207b10c Author: Field G. Van Zee Date: Wed Apr 27 14:57:40 2016 -0500 Added 'restrict' to l1v/l1f code in 'kernels' dir. Details: - Added 'restrict' keyword to existing kernel definitions in 'kernels' directory. These changes were meant for inclusion in bbb8569. commit bbb8569b2a08c3bcd631d5a05eb389d01d94ac07 Author: Field G. Van Zee Date: Wed Apr 27 14:13:46 2016 -0500 Use 'restrict' in all kernel APIs; wspace changes. Details: - Updated level-1v, level-1f kernel function types (bli_l1?_ft.h) and generic kernel prototypes (bli_l1?_ker.h) to use 'restrict' for all numerical operand pointers (ie: all pointers except the cntx_t). - Updated level-1f reference kernel definitions to use 'restrict' for all numerical operand pointers. (Level-1v reference kernel definitions were already updated in bdbda6e.) - Rewrote the level-1v and level-1f reference kernel prototypes in bli_l1v_ref.h and bli_l1f_ref.h, respectively, to simply #include bli_l1v_ker.h and bli_l1f_ker.h with redefined function base names (as was already being done for the level-3 micro-kernel prototypes in bli_l3_ref.h), rather than duplicate the signatures from the _ker.h files. - Added definitions to frame/include/bli_kernel_prototypes.h for axpbyv and xpbyv, which were probably meant for inclusion in bdbda6e. - Converted a number of instances of four spaces, as introduced in bdbda6e, to tabs. commit 4ea419c72c789825e1f93a1eee88219bbf873930 Merge: f1e9be2a bdbda6e6 Author: Field G. Van Zee Date: Tue Apr 26 12:50:45 2016 -0500 Merge pull request #70 from devinamatthews/daxpby Give the level1v operations some love commit bdbda6e6acc682ab1b6ca680edebd09ae12a832c Author: Devin Matthews Date: Mon Apr 25 11:05:57 2016 -0500 Give the level1v operations some love: - Add missing axpby and xpby operations (plus test cases). - Add special case for scal2v with alpha=1. - Add restrict qualifiers. - Add special-case algorithms for incx=incy=1. commit f1e9be2aba1a057eedb947bbae96848597777408 Author: Field G. Van Zee Date: Fri Apr 22 15:34:02 2016 -0500 Minor tweak to test/Makefile. Details: - Just committing a minor change to test/Makefile that has been lingering in my local working copy for longer than I can remember. commit aa0bceec277938328dabeb744680623f24fb0b61 Merge: 4136553f e2784b4c Author: Field G. Van Zee Date: Fri Apr 22 12:01:31 2016 -0500 Merge branch 'master' of github.com:flame/blis commit 4136553f0d0661a668dfdb9edcd7ce1c5773dde7 Author: Field G. Van Zee Date: Fri Apr 22 11:53:53 2016 -0500 Clear level-3 cntx_t's via memset() before use. Details: - In all level-3 operations' _cntx_init() functions, replaced calls to bli_cntx_obj_init() with calls to bli_cntx_obj_clear(), and in all level-3 operations' _cntx_finalize() functions, removed calls to bli_cntx_obj_finalize(), leaving those function definitions empty. - Changed the definition of bli_cntx_obj_clear() so that the clearing occurs via a single call to memset(). commit 4f8c05c9e2ef4cbb82b35a3ebf1f0a0ac665830e Author: Devin Matthews Date: Thu Apr 21 10:00:59 2016 -0500 Rearrange KNL dgemm kernel again to streamline usage of ymm register. sgemm and dgemm now both working with Intel SDE. commit e2784b4c921f706e756df3e146e20a4cb63f53e3 Merge: dd0ab1d9 a9b6c3ab Author: Field G. Van Zee Date: Wed Apr 20 18:34:09 2016 -0500 Merge pull request #67 from devinamatthews/cblas-f77-int Change CBLAS integer type to f77_int commit a9b6c3abda6222a8b240361643932e83cf726c4f Merge: e4c54c81 dd0ab1d9 Author: Devin Matthews Date: Wed Apr 20 16:00:10 2016 -0500 Merge remote-tracking branch 'origin/master' into cblas-f77-int # Conflicts: # config/haswell/bli_config.h commit e4c54c81463c2a19c9bb6b1f0f1be3fa9d018a45 Author: Devin Matthews Date: Wed Apr 20 15:56:46 2016 -0500 Change integer type in CBLAS function signatures to f77_int, and add proper const-correctness to BLAS layer. commit dd0ab1d93f33abca6af9edd7b8e52da62dcfa5b1 Author: Field G. Van Zee Date: Wed Apr 20 14:38:23 2016 -0500 Converted some bli_cntx query functions to macros. Details: - Commented out several datatype-aware query functions (those ending in _dt) from bli_cntx.c, as well as their prototypes in bli_cntx.h, and added equivalent cpp query macros to bli_cntx.h. - Added 'bli_config.h' to .gitignore. commit 7193230f7d35edbd1d2f77842a613971f1603463 Author: Devin Matthews Date: Wed Apr 20 09:37:30 2016 -0500 Work around missing VPMULLQ on KNL. commit a30ccbc4c6a6e6460e78af6b5c530ee0d06f98fb Merge: eb2f18e4 0e1a9821 Author: Field G. Van Zee Date: Tue Apr 19 15:04:33 2016 -0500 Merge pull request #66 from devinamatthews/blas-configure Add configure options and generate bli_config.h automatically. commit bd44cf13e886069bc66c10ac0db178be96629a0d Author: Devin Matthews Date: Tue Apr 19 13:43:04 2016 -0500 Fix copy-paste errors in KNL kernels. commit eb2f18e4844d985715df20798f50f9cc12e3b5ad Author: Field G. Van Zee Date: Tue Apr 19 12:50:32 2016 -0500 More compile-time fixes to bgq gemm ukernel code. commit 0e1a9821d860f6c1d818baf4c48d21a23726c132 Author: Devin Matthews Date: Tue Apr 19 11:44:37 2016 -0500 Add configure options and generate bli_config.h automatically. Options to configure have been added for: - Setting the internal BLIS and BLAS/CBLAS integer sizes. - Enabling and disabling the BLAS and CBLAS layers. Additionally, configure options which require defining macros (the above plus the threading model), write their macros to the automatically-generated bli_config.h file in the top-level build directory. The old bli_config.h files in the config dirs were removed, and any kernel-related macros (SIMD size and alignment etc.) were moved to bli_kernel.h. The Makefiles were also modified to find the new bli_config.h file. Lastly, support for OMP in clang has been added (closes #56). commit a11eec05928ddc5c43fa5dbcd35f2edd24ff35a1 Author: Devin Matthews Date: Mon Apr 18 13:13:36 2016 -0500 Add sgemm ukernels for KNL. vpmullq is not implemented on KNL -- needs workaround. commit ff84469a4575f1ef8a0010046fde52240a312cae Author: Field G. Van Zee Date: Mon Apr 18 12:29:09 2016 -0500 Applied various compilation fixes to bgq kernels. commit c38e0dab05b2dc36672eab96e1248fb7fb2d785b Merge: bd5e2296 cbcd0b73 Author: Devin Matthews Date: Mon Apr 18 10:21:35 2016 -0500 Merge remote-tracking branch 'origin/master' into knl commit bd5e2296e98e042c31f1e8ece2c1ca8e4bdc2d4c Merge: 4745def0 49f85177 Author: Devin Matthews Date: Mon Apr 18 10:15:22 2016 -0500 Merge remote-tracking branch 'origin/knl' into knl commit 4745def0c87377ae83ad73ac514d7de08a96b2ac Author: Devin Matthews Date: Mon Apr 18 10:15:05 2016 -0500 Add 64-bit offset vector so we can use vgatherqpd. commit 49f85177f886f38889b60503a4e12fa7f04be1fd Author: Devin Matthews Date: Mon Apr 18 10:14:11 2016 -0500 KNL ukernel compiles with gcc. commit cbcd0b739dc54bd14fbb46aeda267c26725cd70f Author: Tyler Michael Smith Date: Mon Apr 18 03:12:57 2016 -0500 Changing ifdef for OSX pthread barriers commit 58b2c3cf040134d1be913c585a3c6905629116c0 Author: Devin Matthews Date: Sat Apr 16 16:12:24 2016 -0500 Rewrite of KNL kernel in GNU extended asm syntax. commit dd62080cea78f3a23616200d6640e52c102b2bb9 Author: Field G. Van Zee Date: Fri Apr 15 11:15:41 2016 -0500 Compile-time fix to bgq l1f kernels. Details: - Fixed an old reference to bli_daxpyf_fusefac, which no longer exists, by replacing it with the axpyf fusing factor (8), and cleaned up the relevant section of config/bgq/bli_kernel.h. - Removed most of the details of the level-3 kernels from the template kernel code in config/template/kernels/3 and replaced it with a reference to the relevant kernel wiki maintained on the BLIS github website. commit d5a915dd8d7a6ead42a68772e4420eb3647e6f1a Merge: 4320b725 41694675 Author: Field G. Van Zee Date: Thu Apr 14 12:56:36 2016 -0500 Merge branch 'master' of github.com:flame/blis commit 4320b725a1f8fd34101470b6cf52ad504a79c517 Author: Field G. Van Zee Date: Thu Apr 14 12:51:29 2016 -0500 Use kernel CFLAGS on "ukernels" directories. Details: - Updated the top-level Makefile so that the CFLAGS variable designated for kernel source code is applied not only to source code in directories named "kernels" but source code in any directory that contains the substring "kernels", such as "ukernels". - Formally disabled some code in gen-make-frag.sh script that was already effectively disabled. The code was related to handling "noopt" and "kernel" directories, which is now handled independently within the top-level Makefile without needing to place these source files into a spearate makefile variable. commit 41694675e4cb56e2e0323c7a7db48e0819606a31 Author: Tyler Smith Date: Wed Apr 13 15:51:08 2016 -0500 pthreads bugfixes Getting pthreads to work on my Mac Implemented a pthread barrier when _POSIX_BARRIER isn't defined Now spawn n-1 threads instead of n threads so that master thread isn't just spinning the whole time Add -lpthread instead of -pthread to LDFLAGS (for clang) commit f756dbfa0d542cbc497724981520c83abf049c4b Author: Field G. Van Zee Date: Wed Apr 13 11:25:33 2016 -0500 Removed stale #include from bgq configuration. Details: - Removed an old #include statement ("bli_gemm_8x8.h") from the bli_kernel.h file in the bgq configuration. It turns out this file was no longer needed even prior to 537a1f4. commit 0bd4169ea75f690714e7d2912229932a75d8a7e2 Author: Field G. Van Zee Date: Mon Apr 11 18:08:32 2016 -0500 Fixed context-broken dunnington/penryn kernels. Details: - Added missing context parameters to several instances where simpler kernels, or reference kernels, are called instead of executing the main body code contained in the kernel function in question. - Renamed axpyv and dotv kernel files to use "opt" instead of "int" substring, for consistency with level-1f kernels. commit 7912af5db45b7372d19a9a3dfeb82df302a05628 Author: Field G. Van Zee Date: Mon Apr 11 17:32:13 2016 -0500 CHANGELOG update (0.2.0) commit 898614a555ea0aa7de4ca07bb3cb8f5708b6a002 Author: Field G. Van Zee Date: Mon Apr 11 17:32:09 2016 -0500 Version file update (0.2.0) commit 537a1f4f85ce1aa008901857cb3182e6b4546d7f Author: Field G. Van Zee Date: Mon Apr 11 17:21:28 2016 -0500 Implemented runtime contexts and reorganized code. Details: - Retrofitted a new data structure, known as a context, into virtually all internal APIs for computational operations in BLIS. The structure is now present within the type-aware APIs, as well as many supporting utility functions that require information stored in the context. User- level object APIs were unaffected and continue to be "context-free," however, these APIs were duplicated/mirrored so that "context-aware" APIs now also exist, differentiated with an "_ex" suffix (for "expert"). These new context-aware object APIs (along with the lower-level, type- aware, BLAS-like APIs) contain the the address of a context as a last parameter, after all other operands. Contexts, or specifically, cntx_t object pointers, are passed all the way down the function stack into the kernels and allow the code at any level to query information about the runtime, such as kernel addresses and blocksizes, in a thread- friendly manner--that is, one that allows thread-safety, even if the original source of the information stored in the context changes at run-time; see next bullet for more on this "original source" of info). (Special thanks go to Lee Killough for suggesting the use of this kind of data structure in discussions that transpired during the early planning stages of BLIS, and also for suggesting such a perfectly appropriate name.) - Added a new API, in frame/base/bli_gks.c, to define a "global kernel structure" (gks). This data structure and API will allow the caller to initialize a context with the kernel addresses, blocksizes, and other information associated with the currently active kernel configuration. The currently active kernel configuration within the gks cannot be changed (for now), and is initialized with the traditional cpp macros that define kernel function names, blocksizes, and the like. However, in the future, the gks API will be expanded to allow runtime management of kernels and runtime parameters. The most obvious application of this new infrastructure is the runtime detection of hardware (and the implied selection of appropriate kernels). With contexts in place, kernels may even be "hot swapped" at runtime within the gks. Once execution enters a level-3 _front() function, the memory allocator will be reinitialized on-the-fly, if necessary, to accommodate the new kernels' blocksizes. If another application thread is executing with another (previously loaded) kernel, it will finish in a deterministic fashion because its kernel information was loaded into its context before computation began, and also because the blocks it checked out from the internal memory pools will be unaffected by the newer threads' reinitialization of the allocator. - Reorganized and streamlined the 'ind' directory, which contains much of the code enabling use of induced methods for complex domain matrix multiplication; deprecated bli_bsv_query.c and bli_ukr_query.c, as those APIs' functionality is now mostly subsumed within the global kernel structure. - Updated bli_pool.c to define a new function, bli_pool_reinit_if(), that will reinitialize a memory pool if the necessary pool block size has increased. - Updated bli_mem.c to use bli_pool_reinit_if() instead of bli_pool_reinit() in the definition of bli_mem_pool_init(), and placed usage of contexts where appropriate to communicate cache and register blocksizes to bli_mem_compute_pool_block_sizes(). - Simplified control trees now that much of the information resides in the context and/or the global kernel structure: - Removed blocksize object pointers (blksz_t*) fields from all control tree node definitions and replaced them with blocksize id (bszid_t) values instead, which may be passed into a context query routine in order to extract the corresponding blocksize from the given context. - Removed micro-kernel function pointers (func_t*) fields from all control tree node definitions. Now, any code that needs these function pointers can query them from the local context, as identified by a level-3 micro-kernel id (l3ukr_t), level-1f kernel id, (l1fkr_t), or level-1v kernel id (l1vkr_t). - Removed blksz_t object creation and initialization, as well as kernel function object creation and initialization, from all operation- specific control tree initialization files (bli_*_cntl.c), since this information will now live in the gks and, secondarily, in the context. - Removed blocksize multiples from blksz_t objects. Now, we track blocksize multiples for each blocksize id (bszid_t) in the context object. - Removed the bool_t's that were required when a func_t was initialized. These bools are meant to allow one to track the micro-kernel's storage preferences (by rows or columns). This preference is now tracked separately within the gks and contexts. - Merged and reorganized many separate-but-related functions into single files. This reorganization affects frame/0, 1, 1d, 1m, 1f, 2, 3, and util directories, but has the most obvious effect of allowing BLIS to compile noticeably faster. - Reorganized execution paths for level-1v, -1d, -1m, and -2 operations in an attempt to reduce overhead for memory-bound operations. This includes removal of default use of object-based variants for level-2 operations. Now, by default, level-2 operations will directly call a low-level (non-object based) loop over a level-1v or -1f kernel. - Converted many common query functions in blk_blksz.c (renamed from bli_blocksize.c) and bli_func.c into cpp macros, now defined in their respective header files. - Defined bli_mbool.c API to create and query "multi-bools", or heterogeneous bool_t's (one for each floating-point datatype), in the same spirit as blksz_t and func_t. - Introduced two key parameters of the hardware: BLIS_SIMD_NUM_REGISTERS and BLIS_SIMD_SIZE. These values are needed in order to compute a third new parameter, which may be set indirectly via the aforementioned macros or directly: BLIS_STACK_BUF_MAX_SIZE. This value is used to statically allocate memory in macro-kernels and the induced methods' virtual kernels to be used as temporary space to hold a single micro-tile. These values are now output by the testsuite. The default value of BLIS_STACK_BUF_MAX_SIZE is computed as "2 * BLIS_SIMD_NUM_REGISTERS * BLIS_SIMD_SIZE". - Cleaned up top-level 'kernels' directory (for example, renaming the embarrassingly misleading "avx" and "avx2" directories to "sandybridge" and "haswell," respectively, and gave more consistent and meaningful names to many kernel files (as well as updating their interfaces to conform to the new context-aware kernel APIs). - Updated the testsuite to query blocksizes from a locally-initialized context for test modules that need those values: axpyf, dotxf, dotxaxpyf, gemm_ukr, gemmtrsm_ukr, and trsm_ukr. - Reformatted many function signatures into a standard format that will more easily facilitate future API-wide changes. - Updated many "mxn" level-0 macros (ie: those used to inline double loops for level-1m-like operations on small matrices) in frame/include/level0 to use more obscure local variable names in an effort to avoid variable shaddowing. (Thanks to Devin Matthews for pointing these gcc warnings, which are only output using -Wshadow.) - Added a conj argument to setm, so that its interface now mirrors that of scalm. The semantic meaning of the conj argument is to optionally allow implicit conjugation of the scalar prior to being populated into the object. - Deprecated all type-aware mixed domain and mixed precision APIs. Note that this does not preclude supporting mixed types via the object APIs, where it produces absolutely zero API code bloat. commit dd856c2cb75a2221a503a73dde27790c34b91570 Author: Devin Matthews Date: Mon Apr 11 10:39:18 2016 -0500 Translated MIC kernel to KNL and cleaned up a bit. Only real change is lack of swizzle modifiers for FMA instructions (used bcast from memory instead). commit 7f27431d3fffdda99c282ec412731d0a90cb32a7 Author: Devin Matthews Date: Fri Apr 8 10:04:39 2016 -0500 Copy mic kernel to knl for transliteration. commit f8f02f0334ac020021e15a415bcd33aeea01deb4 Merge: 32c92d94 d1f8e5d9 Author: Devin Matthews Date: Wed Apr 6 11:37:05 2016 -0500 Merge branch 'master' into const_correctness commit 32c92d945c55708da0eb63be1771f8c5430e3910 Merge: 62914ccb 20af937b Author: Devin Matthews Date: Wed Apr 6 11:36:02 2016 -0500 Merge branch 'master' into const_correctness commit d1f8e5d9b2ecd054ed103f4d642d748db2d4f173 Merge: 20af937b c11d28ee Author: Field G. Van Zee Date: Tue Apr 5 12:21:27 2016 -0500 Merge pull request #60 from esauvage/master sgemm µkernel for bulldozer : bug correction for k%4 != 0 commit c11d28eed89d65494bc4019f04d046520866c0ff Author: Etienne Sauvage Date: Sat Apr 2 21:15:48 2016 +0200 cgemm µkernel for bulldozer : bug correction for k%4 != 0 commit 20af937b57f82bb3acb09418d5c0206e1b24f2c7 Merge: 36c3abb0 fc61a114 Author: Field G. Van Zee Date: Thu Mar 31 14:37:30 2016 -0500 Merge pull request #59 from devinamatthews/fix_testsuite_makefile Fix testsuite makefile commit fc61a1143edeba4946d4b9915f1775bb08e643fc Author: Devin Matthews Date: Thu Mar 31 10:53:01 2016 -0500 Fix formatting in configure. commit 26379b14de630e3a6c6eef5dfe87ff001558a8a6 Author: Devin Matthews Date: Thu Mar 31 10:45:48 2016 -0500 Adjust paths in common.mk to support building from testsuite dir. commit 36c3abb05fecb02d4a9ab13b2b69d133adf34583 Merge: 64b41fa5 917ce754 Author: Field G. Van Zee Date: Thu Mar 31 10:26:17 2016 -0500 Merge pull request #58 from esauvage/master cgemm & zgemm micro-kernels for FMA4 instruction set (bulldozer confi… commit 356d854fc9e34642cc46e0e02a8ceb56114878af Author: Devin Matthews Date: Wed Mar 30 16:33:15 2016 -0500 Make symlink to common.mk in build directory. commit edbb8470044f82ef959583ee09613a5a985292b5 Author: Devin Matthews Date: Wed Mar 30 16:27:11 2016 -0500 Refactor out some definitions which moved from make_defs.mk to Makefile for use in testsuite Makefile. commit 917ce75482a543fef46553efff6c246939761e59 Author: Etienne Sauvage Date: Wed Mar 30 22:03:09 2016 +0200 cgemm & zgemm micro-kernels for FMA4 instruction set (bulldozer configuration), based on x86_64/avx micro-kernel commit 62914ccbcdb3c594f065dcfa65bd7e7b95c79283 Merge: bbf704bf 64b41fa5 Author: Devin Matthews Date: Tue Mar 29 15:24:25 2016 -0500 Merge branch 'master' into const_correctness commit 64b41fa554dff44b2f9ad48901b67c63836407a8 Merge: 1b09e343 0171ad58 Author: Field G. Van Zee Date: Tue Mar 29 15:19:41 2016 -0500 Merge pull request #54 from devinamatthews/more_config_opts More config opts commit 1b09e343dfe5b48b4842e2cb96f41c8cc249bad0 Author: Field G. Van Zee Date: Tue Mar 29 12:55:28 2016 -0500 Updated gcc version from 4.8 to 4.9 in .travis.yml. commit 0171ad58997b3a5a9b76301511dbe0751fffc940 Author: Devin Matthews Date: Mon Mar 28 13:55:06 2016 -0500 Add icc and clang support for Intel architectures, fixes #47. 2bd036f fixes #49 BTW. commit 3090fff64cc87ff2519a09f38e6b8699cf3cba11 Merge: 8624e365 4ca5d5b1 Author: Field G. Van Zee Date: Mon Mar 28 12:36:25 2016 -0500 Merge pull request #44 from esauvage/master sgemm micro-kernel for FMA4 instruction set commit e6e566426ac3ded7ef87cd8ff9be98accfdc4acc Merge: 469429ec 8624e365 Author: Devin Matthews Date: Sat Mar 26 14:10:15 2016 -0500 Merge branch 'master' into more_config_opts commit 8624e36543160739d954c4dbcc5a5594458f3a12 Merge: a315833f 2bd036f1 Author: Field G. Van Zee Date: Sat Mar 26 13:56:28 2016 -0500 Merge pull request #50 from devinamatthews/fix_noopt_avx Fix configuration issue where instruction set flags are not specified for debug builds. commit 469429ec34e5b1a172ce35596f9c7afdaacac131 Author: Devin Matthews Date: Fri Mar 25 20:45:41 2016 -0500 Fix LD_FLAGS -> LDFLAGS. commit 8442d65c9ead0376fc5f2dfad62fd4862ab9b2b3 Author: Devin Matthews Date: Fri Mar 25 20:06:48 2016 -0500 Replace -march=native with specific architecture flags to support cross-compiling, and add icc support for Intel architectures. commit 76099f20be1b49ac960f7e3c5a8296bbf4e1782d Author: Devin Matthews Date: Fri Mar 25 17:22:58 2016 -0500 Add threading option to configure. commit ad43eab4c7899d56d8d7caa6e2d92bc0581ea5a5 Merge: 9452bdb3 2bd036f1 Author: Devin Matthews Date: Fri Mar 25 15:00:02 2016 -0500 Merge branch 'fix_noopt_avx' into more_config_opts commit 9452bdb3afbf2d7f898134a091d7790817e7be9c Author: Devin Matthews Date: Fri Mar 25 14:59:50 2016 -0500 Add options for verbose make output and static/shared linking to configure. commit 2bd036f1f9ce1ee0864365557f66d9415dd42de3 Author: Devin Matthews Date: Fri Mar 25 12:16:49 2016 -0500 Fix configuration issue where instruction set flags are not specified for debug builds. commit bbf704bf7501411964a63a68f1af541f612cf92d Author: Devin Matthews Date: Fri Mar 25 09:55:35 2016 -0500 Add missing const to bli_read_nway_from_env. commit a315833f067944fb0bc14cf60f0c7dcb5dc897b6 Merge: 1d1a426d af92773f Author: Field G. Van Zee Date: Thu Mar 24 12:30:21 2016 -0500 Merge pull request #48 from figual/master Updated and improved ARMv8 micro-kernels. commit af92773f4f85a2441fe0c6e3a52c31b07253d08e Author: figual Date: Wed Mar 23 22:07:02 2016 +0100 Updated and improved ARMv8 micro-kernels. commit a4d7729776d17d9bdf2341eacd70b9770b9ba8d2 Author: Devin Matthews Date: Mon Mar 21 09:55:21 2016 -0500 Set default value for debug_type variable. commit 0e2447fa55d8c5fa2b1fc4150073512495c5f9eb Author: Devin Matthews Date: Thu Mar 17 16:32:05 2016 -0500 Add const correctness to auxinfo_t struct (microkernels need update theoretically). commit 1d1a426d18ec03754021456862a1f4d1dfec1fbf Merge: 5a978fff d226dfa0 Author: Field G. Van Zee Date: Mon Mar 7 15:17:53 2016 -0600 Merge pull request #46 from devinamatthews/new-config-opts Add several changes to the build system. commit d226dfa05190eb477b33563b1edccf8603973336 Author: Devin Matthews Date: Sat Mar 5 16:18:14 2016 -0600 Add several changes to the build system. 1) Add -- options. 2) Add -d/--enable-debug option to enable debugging symbols with and without optimization. 3) Allow user to specify CC at configure time, and determine vendor (gcc/icc/etc.). For now configurations enforce a particular vendor. 4) Add make V=[0,1] option to control build verbosity. commit 5a978fffdb8f09a81c89541d541d4a6830cd70a4 Merge: adb2b4e0 63e26423 Author: Field G. Van Zee Date: Fri Mar 4 17:26:58 2016 -0600 Merge pull request #45 from devinamatthews/high_prec_timers Use clock_gettime(CLOCK_MONOTONIC) and mach_absolute_time instead of gettimeofday commit 63e264239053b913164a849dd8a45829087eaddc Author: Devin Matthews Date: Fri Mar 4 13:17:50 2016 -0600 Make sure that -lrt is linked on Linux. commit 44fddd48dc1708a956803d1948f04429ec0d8700 Author: Devin Matthews Date: Fri Mar 4 12:36:38 2016 -0600 Add missing \. commit 7cabd2131f953de23e7015d760b0ddfda51b1251 Author: Devin Matthews Date: Thu Mar 3 11:43:07 2016 -0600 Use clock_gettime(CLOCK_MONOTONIC) and mach_absolute_time instead of gettimeofday. commit adb2b4e096c78e8b2f85fd372cf0d5eb04af5be8 Author: Tyler Smith Date: Wed Mar 2 14:48:12 2016 -0600 Fixing guard for non implemented partitioning through packed matrices commit 4ca5d5b1fd6f2e4a8b2e139c5405475239581e51 Author: Etienne Sauvage Date: Tue Mar 1 21:33:01 2016 +0100 sgemm micro-kernel for FMA4 instruction set (bulldozer configuration), based on x86_64/avx micro-kernel commit 627d59b5ba06866b26f46e4434a0435b600925e3 Author: Etienne Sauvage Date: Mon Feb 29 21:53:12 2016 +0100 symbolic link for bulldozer configuration to kernels commit 2dc5c0ae038ed175fab85751803ada05734d1ba1 Merge: f2809fc5 3d0fae81 Author: Field G. Van Zee Date: Mon Feb 29 12:22:51 2016 -0600 Merge pull request #40 from tkelman/bulldozer-symlink Add symlink from config/bulldozer/kernels to kernels/x86_64/bulldozer commit f2809fc5f74466c755da6a5b4632853e634060b5 Merge: f86b94f2 8624a33c Author: Field G. Van Zee Date: Sat Feb 27 13:06:03 2016 -0600 Merge pull request #39 from devinamatthews/fix_f2c_conflicts Devin's f2c type namespace update. Details: - Added "bla_" prefix to f2c type names to prevent conflicts with external user code. - Removed most of the body of bli_f2c.h, which was unused. commit 3d0fae810d942085d8f2d389820b4e0027577db8 Author: Tony Kelman Date: Thu Feb 25 23:24:03 2016 -0800 Add symlink from config/bulldozer/kernels to kernels/x86_64/bulldozer to fix linking issue mentioned in #37 and https://groups.google.com/forum/#!topic/blis-devel/iypwljcaeEI commit 8624a33ccc12dff6f6c4f92992ca5636af1576a6 Author: Devin Matthews Date: Thu Feb 25 13:51:26 2016 -0600 Fix remaining f2c conflicts. commit 372eef0b6c0a535bf88d4b46b72f61266e8491ba Author: Devin Matthews Date: Thu Feb 25 12:01:58 2016 -0600 Fixed most conflicts after hack-n-slash ofr bli_f2c.h, cleanup in progress. commit f86b94f206e2e09fa3221cc55c3dc5b05ca4775a Author: Field G. Van Zee Date: Tue Feb 23 18:12:34 2016 -0600 Included missing blas2blis integer def to CBLAS. Details: - Added #include "bli_config_macro_defs" to all cblas_*.c files in compat/cblas/src. This has the effect of defining BLIS_BLAS2BLIS_INT_TYPE_SIZE to the default value if bli_config.h does not define it. Thanks to Tony Kelman for reporting this bug. - In cblas_i?amax.c, changed the type of the variable 'iamax' from 'int' to 'f77_int'. This eliminates a compiler warning and a potential runtime bug and/or crash when the size of an int differs from the size of f77_int (as determined by BLIS_BLAS2BLIS_INT_TYPE_SIZE). commit 0b126de1342c11c65623bcb38e258e21e9244e3d Author: Field G. Van Zee Date: Fri Nov 13 16:29:12 2015 -0600 Consolidated packm_blk_var1 and packm_blk_var2. Details: - Consolidated the two blocked variants for packm into a single implementation (packm_blk_var1) and removed the other variant. - Updated all induced method _cntl_init() functions in frame/cntl/ind/ to use the new blocked variant 1. - Defined two new macros, bli_is_ind_packed() and bli_is_nat_packed(), to detect pack_t schemas for induced methods and native execution, respectively. commit 30e5eb29e060b97752f702d2ea5d101d950f53b2 Author: Field G. Van Zee Date: Fri Nov 13 12:14:19 2015 -0600 Minor changes to treatment of rs, cs in bli_obj.c. Details: - Applied a patch submitted by Devin Matthews that: - implements subtle changes to handling of somewhat unusual cases of row and column strides to accommodate certail tensor cases, which includes adding dimension parameters to _is_col_tilted() and _is_row_tilted() macros, - simplifies how buffers are sized when requested BLIS-allocated objects, - re-consolidates bli_adjust_strides_*() into one function, and - defines 'restrict' keyword as a "nothing" macro for C++ and pre-C99 environments. commit f0a4f41b5acf55b41707ec821c4c5f9076dfbc24 Author: Field G. Van Zee Date: Thu Nov 12 15:22:50 2015 -0600 Fixed unimplemented case in core2 sgemm ukernel. Details: - Implemented the "beta == 0" case for general stride output for the dunnington sgemm micro-kernel. This case had been, up until now, identical to the "beta != 0" case, which does not work when the output matrix has nan's and inf's. It had manifested as nan residuals in the test suite for right-side tests of ctrsm4m1a. Thanks to Devin Matthews for reporting this bug. commit 42810bbfa0b8f006ecc5128d903909ec13ea63f9 Author: Field G. Van Zee Date: Thu Nov 12 12:07:46 2015 -0600 Fixed minor bugs for uncommon obj_create cases. Details: - Separated bli_adjust_strides() into _alloc() and _attach() flavors so that the latter can avoid a test performed by the former, in which the rs and cs are overridden and set to zero if either matrix dimension is zero. Actually, we also disable this overridding behavior, even for the _alloc() case, since keeping the original strides (probably) does not hurt anything. The original code has been kept commented-out, though, in case an unintended consequence is later discovered. - Fixed a typo in an error check for general stride cases where rs == cs. commit 3e6dd11467643fbc2cb45c13cec8dd6024232833 Author: Field G. Van Zee Date: Tue Nov 3 10:30:08 2015 -0600 Minor re-expression in quadratic partitioning code. Details: - Minor change to quadratic equation solution code that avoids recomputation of the sqrt() parameter when the compiler is not smart enough to perform this optimization automatically. commit 0694b722f7e4df00efb32639095a2aca80e67f52 Merge: 3e116f0a 33557ecc Author: Field G. Van Zee Date: Mon Nov 2 17:24:25 2015 -0600 Merge branch 'master' of github.com:flame/blis commit 3e116f0a2953f50b3c068759a775ad7ffae04e49 Author: Field G. Van Zee Date: Mon Nov 2 17:18:23 2015 -0600 Fixed imaginary bug in quadratic partitioning code. Details: - Fixed a bug in the relatively new quadratic partitioning code that, under the right conditions, would perform sqrt() on a negative value. If the solution is imaginary, we discard it and use an alternate partition width that assumes no diagonal intersection. That alternate width is actually already computed, so, the fix was quite simple. Thanks to Devangi Parikh for reporting this bug. commit 33557ecccaf49b2569b7f3d7bcea52c2aab94c68 Author: Jeff Hammond Date: Mon Nov 2 12:18:43 2015 -0800 add Travis CI build status icon to the README commit 4a502fbe77bd0f701108baaa559d9cfb483f88de Author: Field G. Van Zee Date: Mon Nov 2 13:28:34 2015 -0600 Laid groundwork for runtime memory pool resizing. Details: - Changed bli_pool_finalize() so that the freeing begins with the block at top_index instead of block 0. This allows us to use the function for terminal finalization as well as temporary cleanup prior to reinitialization. Also, clear the pool_t struct upon _pool_finalize() in case it is called in the terminal case with some blocks still checked out to threads (in which case the threads will see the new block size as 0 and thus release the block as intended). - Added bli_pool_reinit(), which calls _pool_finalize() followed by _pool_init() with new parameters. - Added bli_mem_reinit(), which is based on bli_pool_reinit(). - Added new wrapper, _mem_compute_pool_block_sizes(), which calls _mem_compute_pool_block_sizes_dt(). - Updated bli_mem_release() so that the pblk_t is freed, via _pool_free_block(), if the block size recorded in the mem_t at the time the pblk_t was acquired is now different from the value in the pool_t. commit 37e55ca39bdbddaec03ad30d43e8ad2b3e549c96 Author: Field G. Van Zee Date: Fri Oct 30 18:25:04 2015 -0500 Fixed obscure 3m1/4m1a bugs in trmm[3] and trsm. Details: - Fixed a family of bugs in the triangular level-3 operations for certain complex implementations (3m1 and 4m1a) that only manifest if one of the register blocksizes (PACKMR/PACKNR, actually) is odd: - Fixed incorrect imaginary stride computation in bli_packm_blk_var2() for the triangular case. - Fixed the incorrect computation of imaginary stride, as stored in the auxinfo_t struct in trmm and trsm macro-kernels. - Fixed incorrect pointer arithmetic in the trsm macro-kernels in the cases where the the register blocksize for the triangular matrix is odd. Introduced a new byte-granular pointer arithmetic macro, bli_ptr_add(), that computes the correct value. - Added cpp macro to bli_macro_defs.h for typeof() operator, defined in terms of __typeof__, which is used by bli_ptr_add() macro. - Disabled the row- vs. column-storage optimization in bli_trmm_front() for singleton problems because the inherent ambiguity of whether a scalar is row-stored or column-stored causes the wrong parameter combination code to be executed (by dumb luck of our checking for row storage first). - Added commented-out debugging lines to 3m1/4m1a and reference micro-kernels, and trsm_ll macro-kernel. commit 46294d80e5a79c598e200e1c8ec2a642ff839971 Merge: d3159c57 a0a7b85a Author: Field G. Van Zee Date: Tue Oct 27 12:41:23 2015 -0500 Merge pull request #35 from figual/master Fixed incomplete code in the double precision ARMv8 microkernel. commit a0a7b85ac3e157af53cff8db0e008f4a3f90372c Author: Francisco Igual Date: Tue Oct 27 08:59:15 2015 +0000 Fixed incomplete code in the double precision ARMv8 microkernel. commit d3159c5740c9ee7f8c0b661003aab6f00646ad6f Merge: b489152e 7e03e45b Author: Field G. Van Zee Date: Wed Oct 21 14:54:00 2015 -0500 Merge branch 'master' of github.com:flame/blis commit b489152e112644ec3b6d19e687231a9607f7694f Author: Field G. Van Zee Date: Wed Oct 21 14:53:17 2015 -0500 Use vzeroall in haswell micro-kernels. commit 7e03e45bfe6c27c4fdbf06b1caa7f49e9a5fef49 Merge: 77ddb0b1 4f88c29f Author: Field G. Van Zee Date: Wed Oct 14 13:26:07 2015 -0500 Merge pull request #33 from xianyi/master Enable Travis CI commit 4f88c29f9e634cbb6fb22d8c88931f0ec78ad7db Author: Zhang Xianyi Date: Wed Oct 14 12:57:50 2015 -0500 Detect Intel Broadwell (using Haswell config). commit 4b0ac1a9984a93f7ad4369b10fca63991107d9f5 Merge: fe3e355c 77ddb0b1 Author: Zhang Xianyi Date: Wed Oct 14 12:51:05 2015 -0500 Merge branch 'upstream_master' commit 77ddb0b1d31ada111dadf392766ba6d9210ed9fb Author: Field G. Van Zee Date: Tue Oct 13 12:53:06 2015 -0500 Removed flop-counting mechanism. Details: - Removed the optional flop-counting feature introduced in commit 7574c994. commit 276da366187460a4c8e6e0910e79cb39ce780bfe Author: Field G. Van Zee Date: Mon Oct 12 11:43:03 2015 -0500 Minor formatting change to README.md. commit d17057446f5404824478e8a6cd08f242ab75544a Author: Field G. Van Zee Date: Mon Oct 12 11:39:49 2015 -0500 Added "Getting Started" section to README.md. Details: - Added section to README.md file containing links to wikis with brief descriptions. commit e7e1f2f7b601b21b50e3cdad8972cb3fe11018d3 Author: Field G. Van Zee Date: Fri Oct 2 16:51:52 2015 -0500 Minor updates to CREDITS, README files. commit 55329906ecd7ce1ab910e4d30a29354a9172e7ea Author: Field G. Van Zee Date: Sat Sep 26 20:47:19 2015 -0500 Minor edits to README.md, testsuite. Details: - Fixed typos in README.md. - Fixed column heading alignment for testsuite when matlab output is enabled. - Minor updates to test/3m4m/runme.sh and test/3m4m/Makefile. commit bbebdb5793a8fd6aaf257012ab0272beaa04a0de Author: Field G. Van Zee Date: Fri Sep 25 14:47:27 2015 -0500 Replaced README with README.md. Details: - Replaced the old (and short) README file with a much more comprehensive version written in github-flavored markdown. The new file is based on content taken from the old Google Code homepage. commit e2e9d64a63485461192d9c2a6dd0183a8b71013c Author: Field G. Van Zee Date: Thu Sep 24 12:14:03 2015 -0500 Load balance thread ranges for arbitrary diagonals. Details: - Expanded/updated interface for bli_get_range_weighted() and bli_get_range() so that the direction of movement is specified in the function name (e.g. bli_get_range_l2r(), bli_get_range_weighted_t2b()) and also so that the object being partitioned is passed instead of an uplo parameter. Updated invocations in level-3 blocked variants, as appropriate. - (Re)implemented bli_get_range_*() and bli_get_range_weighted_*() to carefully take into account the location of the diagonal when computing ranges so that the area of each subpartition (which, in all present level-3 operations, is proportional to the amount of computation engendered) is as equal as possible. - Added calls to a new class of routines to all non-gemm level-3 blocked variants: bli__prune_unref_mparts_[mnk]() where is herk, trmm, or trsm and [mnk] is chosen based on which dimension is being partitioned. These routines call a more basic routine, bli_prune_unref_mparts(), to prune unreferenced/unstored regions from matrices and simultaneously adjust other matrices which share the same dimension accordingly. - Simplified herk_blk_var2f, trmm_blk_var1f/b as a result of more the new pruning routines. - Fixed incorrect blocking factors passed into bli_get_range_*() in bli_trsm_blk_var[12][fb].c - Added a new test driver in test/thread_ranges that can exercise the new bli_get_range_*() and bli_get_range_weighted_*() under a range of conditions. - Reimplemented m and n fields of obj_t as elements in a "dim" array field so that dimensions could be queried via index constant (e.g. BLIS_M, BLIS_N). Adjusted/added query and modification macros accordingly. - Defined mdim_t type to enumerate BLIS_M and BLIS_N indexing values. - Added bli_round() macro, which calls C math library function round(), and bli_round_to_mult(), which rounds a value to the nearest multiple of some other value. - Added miscellaneous pruning- and mdim_t-related macros. - Renamed bli_obj_row_offset(), bli_obj_col_offset() macros to bli_obj_row_off(), bli_obj_col_off(). commit fe3e355c9c5a6f65b8736b009e2d501b62a83ea1 Merge: efa641e3 4dd9dd3e Author: Zhang Xianyi Date: Fri Aug 21 14:38:36 2015 -0500 Merge branch 'upstream_master' commit efa641e36b73abee34166a252e90e28a6281d92d Author: Zhang Xianyi Date: Sat Aug 22 03:15:50 2015 +0800 Try to fix the compiling bug on travis. commit 4dd9dd3e1de626b51bfe85d9ee65f193d60e8d38 Author: Field G. Van Zee Date: Fri Aug 21 11:52:37 2015 -0500 Fixed minor alignment ambiguity bug in bli_pool.c. Details: - Fixed a typecasting ambiguity in bli_pool_alloc_block() in which pointer arithmetic was performed on a void* as if it were a byte pointer (such as char*). Some compilers may have already been interpreting this situation as intended, despite the sloppiness. Thanks to Aleksei Rechinskii for reporting this issue. - Redefined pointer alignment macros to typecast to uintptr_t instead of siz_t. commit 12ffd568b04feda57147c13b67717416a01c82f8 Author: Zhang Xianyi Date: Sat Aug 22 00:24:28 2015 +0800 Add Travis CI. commit ecc3ebb749e0861c27deda52b5f87236ede4901b Author: Field G. Van Zee Date: Wed Jul 29 13:31:12 2015 -0500 CHANGELOG update (0.1.8) commit 47caa33485b91ea6f2a5e386e61210c90c5f489f Author: Field G. Van Zee Date: Wed Jul 29 13:31:09 2015 -0500 Version file update (0.1.8) commit ef0fbbbdb6148b96938733fce72cb4ed7dad685e Merge: fdfe14f1 d4b89136 Author: Field G. Van Zee Date: Thu Jul 9 13:54:54 2015 -0500 Merge branch 'master' of github.com:flame/blis commit fdfe14f1e17ba5a2f8dfa0bdb799c6b0e730211b Author: Field G. Van Zee Date: Thu Jul 9 13:52:39 2015 -0500 Added support for Intel Haswell/Broadwell. Details: - Added sgemm and dgemm micro-kernels, which employ 256-bit AVX vectors and FMA instructions. (Complex support is currently provided by default induced method, 4m1a.) - Added a 'haswell' configuration, which uses the aforementioned kernels. - Inserted auto-detection support for haswell configuration in build/auto-detect/cpuid_x86.c. - Modified configure script to explicitly echo when automatic or manual configuration is in progress. - Changed beta scalar in test_gemm.c module of test suite to -1.0 to 0.9. commit d4b891369c1eb0879ade662ff896a5b9a7fca207 Author: Field G. Van Zee Date: Tue Jul 7 10:06:53 2015 -0500 Added 'carrizo' configuration. Details: - Added a new configuration for AMD Excavator-based hardware also known as Carrizo when referring to the entire APU. This configuration uses the same micro-kernels as the piledriver, but with different cache blocksizes. commit 0b7255a642d56723f02d7ca1f8f21809967b8515 Author: Field G. Van Zee Date: Fri Jun 19 12:01:50 2015 -0500 CHANGELOG update (0.1.7) commit 267253de8a7be546ce87626443ee38701c1d411f Author: Field G. Van Zee Date: Fri Jun 19 12:01:49 2015 -0500 Version file update (0.1.7) commit 7cd01b71b5e757a6774625b3c9f427f5e7664a76 Author: Field G. Van Zee Date: Fri Jun 19 11:31:53 2015 -0500 Implemented dynamic allocation for packing buffers. Details: - Replaced the old memory allocator, which was based on statically- allocated arrays, with one based on a new internal pool_t type, which, combined with a new bli_pool_*() API, provides a new abstract data type that implements the same memory pool functionality but with blocks from the heap (ie: malloc() or equivalent). Hiding the details of the pool in a separate API also allows for a much simpler bli_mem.c family of functions. - Added a new internal header, bli_config_macro_defs.h, which enables sane defaults for the values previously found in bli_config. Those values can be overridden by #defining them in bli_config.h the same way kernel defaults can be overridden in bli_kernel.h. This file most resembles what was previously a typical configuration's bli_config.h. - Added a new configuration macro, BLIS_POOL_ADDR_ALIGN_SIZE, which defaults to BLIS_PAGE_SIZE, to specify the alignment of individual blocks in the memory pool. Also added a corresponding query routine to the bli_info API. - Deprecated (once again) the micro-panel alignment feature. Upon further reflection, it seems that the goal of more predictable L1 cache replacement behavior is outweighed by the harm caused by non-contiguous micro-panels when k % kc != 0. I honestly don't think anyone will even miss this feature. - Changed bli_ukr_get_funcs() and bli_ukr_get_ref_funcs() to call bli_cntl_init() instead of bli_init(). - Removed query functions from bli_info.c that are no longer applicable given the dynamic memory allocator. - Removed unnecessary definitions from configurations' bli_config.h files, which are now pleasantly sparse. - Fixed incorrect flop counts in addv, subv, scal2v, scal2m testsuite modules. Thanks to Devangi Parikh for pointing out these miscalculations. - Comment, whitespace changes. commit 9848f255a3bab17d1139c391cca13ff3f1ffe6ed Author: Field G. Van Zee Date: Thu Jun 11 19:14:22 2015 -0500 Added early return to API-level _init() routines. Details: - Added conditional code that returns early from the API-level _init() routines if the API is already initialized. Actually meant for this to be included in 5f93cbe8. commit 5f93cbe870f3478870e15581e7fd450dad5bba1e Author: Field G. Van Zee Date: Thu Jun 11 18:52:12 2015 -0500 Introduced API-level initialization. Details: - Added API-level initialization state to _const, _error, _mem, _thread, _ind, and _cntl APIs. While this functionality will mostly go unused, adding miniscule overhead at init-time, there will be at least once instance in the near future where, in order to avoid an infinite loop, a certain portion of the initialization will call a query function that itself attempts to call bli_init(). API-level initialization will allow this later stage to verify that an earlier stage of initialization has completed, even if the overall call to bli_init() has not yet returned. - Added _is_initialized() functions for each API, setting the underlying bool_t during _init() and unsetting it during _finalize(). - Comment, whitespace changes. commit ee129c6b028bc5ac88da7c74fde72c49803742ff Author: Field G. Van Zee Date: Wed Jun 10 12:53:28 2015 -0500 Fixed bugs in _get_range(), _get_range_weighted(). Details: - Fixed some bugs that only manifested in multithreaded instances of some (non-gemm) level-3 operations. The bugs were related to invalid allocation of "edge" cases to thread subpartitions. (Here, we define an "edge" case to be one where the dimension being partitioned for parallelism is not a whole multiple of whatever register blocksize is needed in that dimension.) In BLIS, we always require edge cases to be part of the bottom, right, or bottom-right subpartitions. (This is so that zero-padding only has to happen at the bottom, right, or bottom-right edges of micro-panels.) The previous implementations of bli_get_range() and _get_range_weighted() did not adhere to this implicit policy and thus produced bad ranges for some combinations of operation, parameter cases, problem sizes, and n-way parallelism. - As part of the above fix, the functions bli_get_range() and _get_range_weighted() have been renamed to use _l2r, _r2l, _t2b, and _b2t suffixes, similar to the partitioning functions. This is an easy way to make sure that the variants are calling the right version of each function. The function signatures have also been changed slightly. - Comment/whitespace updates. - Removed unnecessary '/' from macros in bli_obj_macro_defs.h. commit 9135dfd69d39f3bbd75034f479f27a78dbfebcce Author: Field G. Van Zee Date: Fri Jun 5 13:37:44 2015 -0500 Minor updates to test/3m4m files. commit d62ceece943b20537ec4dd99f25136b9ba2ae340 Author: Field G. Van Zee Date: Wed Jun 3 12:56:45 2015 -0500 Minor update to test/3m4m/runme.sh. Details: - Removed some stale script code that should have been removed during 590bb3b8c. commit b6ee82a3d421c9c4f1eb6848c7c6e37aa46de799 Author: Field G. Van Zee Date: Wed Jun 3 12:14:23 2015 -0500 Minor cleanup to bli_init() and friends. Details: - Spun-off initialization of global scalar constants to bli_const_init() and of threading stuff to bli_thread_init(). - Added some missing _finalize() functions, even when there is nothing to do. commit 1213f5cebabc1637ce9dd45c4bfa87bb93677c29 Author: Field G. Van Zee Date: Tue Jun 2 13:27:47 2015 -0500 POSIX thread bugfixes/edits to bli_init.c, _mem.c. Details: - Fixed a sort-of bug in bli_init.c whereby the wrong pthread mutex was used to lock access to initialization/finalization actions. But everything worked out okay as long as bli_init() was called by single-threaded code. - Changed to static initialization for memory allocator mutex in bli_mem.c, and moved mutex to that file (from bli_init.c). - Fixed some type mismatches in bli_threading_pthreads.c that resulted in compiler warnings. - Fixed a small memory leak with allocated-but-never-freed (and unused) pthread_attr_t objects. - Whitespace changes to bli_init.c and bli_mem.c. commit 590bb3b8c5c0389159c5a9451b6c156c5f237e8a Author: Field G. Van Zee Date: Sun May 24 16:02:53 2015 -0500 Backed-out adjusted dim changes to test/3m4m. Details: - Reverted most changes applied during commit ec25807b. commit ec25807b26da943868f0d0517c3720e50181b8f9 Author: Field G. Van Zee Date: Fri Apr 10 13:23:50 2015 -0500 Tweaks to test/3m4m to test with adjusted dims. Details: - Updated test/3m4m driver files to build test drivers that allow comparision of real "asm_blis" results to complex "asm_blis" results, except with the latter's problem sizes adjusted so that problems are generated with equal flop counts. commit 426b6488580a92bf071a62dc319a9c837ce39821 Author: Field G. Van Zee Date: Wed Apr 8 15:12:21 2015 -0500 Fixed a packing bug that manifested in trsm_r. Details: - Fixed a bug that caused a memory leak in the contiguous memory allocator. Because packm_init() was using simple aliasing when a subpartition object was marked as zeros by bli_acquire_mpart_*(), the "destination" pack object's mem_t entry was being overwritten by the corresponding field of the "source" object (which was likely NULL). This prevented the block from being released back to the memory allocator. But this bug only manifested when changing the location of packing B from outside the var1 loop to inside the var3 loop, and only for trsm with triangular B (side = right). The bug was fixed by changing the type of alias used in packm_init() when handling zero partition cases. Specifically, we now use bli_obj_alias_for_packing(), which does not clobber the destination (pack) object's mem_t field. Thanks to Devangi Parikh for this bug report. commit c84286d5cef48f16d83831baac1f46b9856b9a36 Author: Field G. Van Zee Date: Sat Apr 4 15:39:14 2015 -0500 More minor tweaks to test/3m4m. Details: - Added a line of output that forces matlab to allocate the entire array up-front. - Re-enabled real domain benchmarks in runme.sh, which were temporarily disabled. commit 309717c8ebf4ef1369f15cf41340e13c25b41573 Author: Field G. Van Zee Date: Fri Apr 3 19:28:49 2015 -0500 More tweaks to test/3m4m, configurations. Details: - Fixed incorrect number of mc_x_kc memory blocks in sandybridge/bli_config.h. - Enabled OpenMP multithreding in piledriver/bli_config.h. - More updates to test/3m4m driver files. commit 4baf3b9c69b2f648be9e46e07ccc9859dd675828 Author: Field G. Van Zee Date: Fri Apr 3 16:44:32 2015 -0500 Tweaked test/3m4m driver, including acml support. Details: - Added ACML support to test/3m4m driver Makefile and runme.sh script. commit a32f7c49ca4ea869d2a6c66818780f4321743d67 Merge: 349e075a 4bfd1ce8 Author: Field G. Van Zee Date: Fri Apr 3 08:28:11 2015 -0500 Merge pull request #23 from xianyi/master Add auto-detecting CPU on configure stage. commit 349e075ad6a8e2a1211d94f36d24828c9d44b052 Author: Field G. Van Zee Date: Thu Apr 2 18:12:28 2015 -0500 Tweaks to sandybridge config, test/3m4m driver. Details: - Enable OpenMP support by default in sandybridge's bli_config.h. - Reorganized sandybridge's bli_kernel.h. - Updated 3m4m Makefile, runme.sh to also test MKL implementation. commit 4bfd1ce8ca93f93d170dd2715f0a32027b417b46 Author: Zhang Xianyi Date: Thu Apr 2 16:40:21 2015 -0500 Detect NEON for cortex-a9 and cortex-a15. commit aa6eec4f43137057276fe6119bdbfb5c52682527 Author: Zhang Xianyi Date: Thu Apr 2 16:03:44 2015 -0500 Detect the CPU architecture. Support ARM cores. Detect the CPU architecture by compiler's predefined macros. Then, detect the CPU cores. Support detecting x86 and ARM architectures. commit 2947cfb749c937b0f62fac36cc92f123bd45b53c Author: Zhang Xianyi Date: Wed Apr 1 12:24:00 2015 -0500 Add auto-detecting CPU on configure stage. e.g. /Path_to_BLIS/configure auto Now, it only support detecting x86 CPUs. commit 26a4b8f6f985597f80e0174990bf541f1d9bafac Author: Field G. Van Zee Date: Wed Apr 1 10:44:54 2015 -0500 Implemented 3m2, 3m3 induced algorithms (gemm only). Details: - Defined a new "3ms" (separated 3m) pack schema and added appropriate support in packm_init(), packm_blk_var2(). - Generalized packm_struc_cxk_3mi to take the imaginary stride (is_p) as an argument instead of computing it locally. Exception: for trmm, is_p must be computed locally, since it changes for triangular packed matrices. Also exposed is_p in interface to dt-specific packm_blk_var2 (and _var1, even though it does not use imaginary stride). - Renamed many functions/variables from _3mi to _3mis to indicate that they work for either interleaved or separated 3m pack schemas. - Generalized gemm and herk macro-kernels to pass in imaginary stride rather than compute them locally. - Added support for 3m2 and 3m3 algorithms to frame/ind, including 3m2- and 3m3-specific virtual micro-kernels. - Added special gemm macro-kernels to support 3m2 and 3m3. - Added support for 3m2 and 3m3 to testsuite. - Corrected the type of the panel dimension (pd_) in various macro- kernels from inc_t to dim_t. - Renamed many functions defined in bli_blocksize.c. - Moved most induced-related macro defs from frame/include to frame/ind/include. - Updated the _ukernel.c files so that the micro-kernel function pointers are obtained from the func_t objects rather than the cpp macros that define the function names. - Updated test/3m4m driver, Makefile, and run script. commit ddf62ba7d2da08225b201585b85e06c967767dea Author: Tyler Smith Date: Fri Mar 27 14:27:51 2015 -0500 Refuse to free the packm thread info if it uses the single threaded version commit 016fc587584d958a0e430a56a5e2c05022ac2f17 Author: Tyler Smith Date: Fri Mar 27 14:23:02 2015 -0500 Don't free packm thread info if it is null commit 00a443c529a60862a57b93e303a0b3212c9b1df4 Author: Tyler Smith Date: Fri Mar 27 14:11:07 2015 -0500 Use bli_malloc instead of malloc for the thread info paths commit f1a6b7d02861ccebdc500ea98778cc0f6cddad17 Author: Field G. Van Zee Date: Wed Mar 18 15:37:10 2015 -0500 Reorganized code for induced complex methods. Details: - Consolidated most of the code relating to induced complex methods (e.g. 4mh, 4m1, 3mh, 3m1, etc.) into frame/ind. Induced methods are now enabled on a per-operation basis. The current "available" (enabled and implemented) implementation can then be queried on an operation basis. Micro-kernel func_t objects as well as blksz_t objects can also be queried in a similar maner. - Redefined several micro-kernel and operation-related functions in bli_info_*() API, in accordance with above changes. - Added mr and nr fields to blksz_t object, which point to the mr and nr blksz_t objects for each cache blocksize (and are NULL for register blocksizes). Renamed the sub-blocksize field "sub" to "mult" since it is really expressing a blocksize multiple. - Updated bli_*_determine_kc_[fb]() for gemm/hemm/symm, trmm, and trsm to correctly query mr and nr (for purposes of nudging kc). - Introduced an enumerated opid_t in bli_type_defs.h that uniquely identifies an operation. For now, only level-3 id values are defined, along with a generic, catch-all BLIS_NOID value. - Reworked testsuite so that all induced methods that are enabled are tested (one at a time) rather than only testing the first available method. - Reformated summary at the beginning of testsuite output so that blocksize and micro-kernel info is shown for each induced method that was requested (as well as native execution). - Reduced the number of columns needed to display non-matlab testsuite output (from approx. 90 to 80). commit 8d5169ccda954e5f72944308a036dcb7ebfc9097 Author: Field G. Van Zee Date: Wed Mar 18 11:38:08 2015 -0500 Fixed bug in release of mem_t buffer. Details: - Fixed a bug that affects all level-2 and level-3 blocked variants. The bug only manifested, however, if the packing of operands (A and B in gemm, for example) spanned multiple nodes in the control tree. Until recently, the main consumers of packm were level-3 operations, all of which packed both input operands from blocked variant 1 (B outside of the loop, and A within the loop). This particular usage masked a flaw in the code whereby bli_obj_release_pack() would always release the underlying mem_t buffer (provided it was allocated), even if the buffer was not allocated in the current variant. This has been fixed by replacing all calls to bli_obj_release_pack() with calls to a new function, bli_packm_release(), which takes the same control tree node argument passed into the object's corresponding call to packm_init() or packv_init(). bli_packm_release() then proceeds to invoke bli_obj_release_pack() only if the control tree node indicates that packing was requested. Thanks to Devangi Parikh for identifying this bug. commit c0acca0f5182ba96fd39c9d10b34a896a6e74206 Author: Field G. Van Zee Date: Tue Mar 3 10:56:22 2015 -0600 Clarified comments in testsuite input.operations. commit 03ba9a6b17861d9e1adc0cf924439c4d7e860d19 Author: Field G. Van Zee Date: Tue Feb 24 10:33:28 2015 -0600 Removed some 'old' directories. commit a86db60ee270cdeb745ae7cf68f9e0becc9f522d Author: Field G. Van Zee Date: Mon Feb 23 18:42:39 2015 -0600 Extensive renaming of 3m/4m-related files, symbols. Details: - Renamed all remaining 3m/4m packing files and symbols to 3mi/4mi ('i' for "interleaved"). Similar changes to 3M/4M macros. - Renamed all 3m/4m files and functions to 3m1/4m1. - Whitespace changes. commit 8cf8da291a0fb2f491f410969a76ec0fbda47faf Author: Field G. Van Zee Date: Fri Feb 20 15:24:27 2015 -0600 Minor updates to induced complex mode management. Details: - Relocated bli_4mh.c, bli_4mb.c, bli_4m.c, bli_3mh.c, bli_3m.c (and associated headers) from frame/base to frame/base/induced. - Added bli_xm.? to frame/base/induced, which implements bli_xm_is_enabled(), which detects whether ANY induced complex method is currently enabled. - The new function bli_xm_is_enabled() is now used in bli_info.c to detect when an induced complex method is used, so we know when to return blocksizes from one of the induced methods' blocksize objects. commit 411e637ee7d1083a84f58f08938d51e63d7c3c9a Merge: c2569b88 fc0b7712 Author: Tyler Michael Smith Date: Fri Feb 20 20:39:25 2015 -0600 Merge branch 'master' of http://github.com/flame/blis commit c2569b8803d4ccc1d7b6f391713461b51443601d Author: Tyler Michael Smith Date: Fri Feb 20 20:38:19 2015 -0600 Fixed a memory leak in freeing the thread infos commit fc0b771227abf86d81f505b324f69f6e83db1d8f Author: Field G. Van Zee Date: Fri Feb 20 11:47:44 2015 -0600 Added max(mr,nr) to kc in static mem pools. Details: - Changed the static memory definitions to compute the maximum register blocksize for each datatype and add it to kc when computing the size of blocks of A and B. This formally accounts for the nudging of kc up to a multiple of mr or nr at runtime for triangular operations (e.g. trmm). commit af32e3a608631953ef770341df10a14a991bf290 Author: Tyler Michael Smith Date: Thu Feb 19 22:51:11 2015 -0600 Fixed a bug with get_range_weighted would return end = 0 for small problem sizes commit 441d47542a64e131578d00da7404c1ed387a721c Author: Field G. Van Zee Date: Thu Feb 19 17:06:10 2015 -0600 Renamed 3m and 4m symbols/macros to 3mi and 4mi. Details: - Renamed several variables and macros from 3m/4m to 3mi/4mi. This is because those packing schemas were always implicitly "interleaved". This new naming scheme will make way for new schemas that separate instead of interleve the real and imaginary (and summed) parts. - Expanded the pack format sub-field of the pack schema field of the info_t to 4 bits (from 3). This will allow for more schema types going forward. - Removed old _cntl.c files for herk3m, herk4m, trmm3m, trmm4m. commit 518a1756ccf02122b96fc437b538604a597df42a Author: Field G. Van Zee Date: Thu Feb 19 14:27:09 2015 -0600 Fixed indexing bug for trmm3 via 3mh, 4mh. Details: - Fixed a bug that only affected trmm3 when performed via 3mh or 4mh, whereby micro-panels of the triangular matrix were packed with "dead space" between them due to failing to adjust for the fact that pointer arithmetic was occurring in units of complex elements while the data being packed consisted of real elements. It turns out that the macro- kernel suffered from the same bug, meaning the panels were actually being packed and read consistently. The only way I was able to discover the bug in the first place was because the packed block of A was overflowing into the beginning of the packed row panel of B using the sandybridge configuration. commit 493087d730f01d5169434f461644e5633f48a42f Merge: 650d2a6f 25021299 Author: Field G. Van Zee Date: Wed Feb 18 09:45:51 2015 -0600 Merge branch 'master' of github.com:flame/blis commit 25021299b670775df8ca9c87910c63d7e74ed946 Merge: fe2b8d39 f05a5763 Author: Field G. Van Zee Date: Wed Feb 11 20:03:21 2015 -0600 Merge branch 'master' of github.com:flame/blis commit fe2b8d39a445ac848686e78c7540fd046cb95492 Author: Field G. Van Zee Date: Wed Feb 11 19:33:10 2015 -0600 Fixed an obscure bug in 3mh/3m/4mh/4m packing. Details: - Modified bli_packm_blk_var1.c and _var2.c to increase the triangular case's panel increment by 1 if it would otherwise be odd. This is particularly necessary in _var2.c when handling the interleaved 3m or ro/io/rpi pack schemas, since division of an odd number by 2 can happen if both the panel length and the panel packing dimension (register packing blocksize) are odd, thus making their product odd. - Modified bli_packm_init.c so that panel strides are increased by 1 if they would otherwise be odd, even for non-3m related packing. - Modified the trmm and trsm macro-kernels so that triangular packed micro-panels are traversed with this new "increment by 1 if odd" policy. - Added sanity checks in trmm and trsm macro-kernels that would result in an abort() if the conditions that would lead to a "divide odd integer by 2" scenario ever manifest. - Defined bli_is_odd(), _is_even() macros in bli_scalar_macro_defs.h. commit 650d2a6ff2e593151a296ca86b5214afcc747afc Author: Field G. Van Zee Date: Mon Feb 9 14:59:20 2015 -0600 Added initial support for imaginary stride. Details: - Added an imaginary stride field ("is") to obj_t. - Renamed bli_obj_set_incs() macro to bli_obj_set_strides(). - Defined bli_obj_imag_stride() and bli_obj_set_imag_stride() and added invocations in key locations. - Added some basic error-checking related to imaginary stride. - For now, imaginary stride will not be exposed into the most-used BLIS APIs such as bli_obj_create(), and certainly not the computational APIs such as bli_dgemm(). commit f05a57634a7c8e3864b25b3335d1194c1ea1aeb9 Author: Field G. Van Zee Date: Sun Feb 8 19:40:34 2015 -0600 Defined gemm cntl function to query ukrs func_t. Details: - Added a new function, bli_gemm_cntl_ukrs(), that returns the func_t* for the gemm micro-kernels from the leaf node of the control tree. This allows all the func_t* fields from higher-level nodes in the tree to be NULL, which makes the function that builds the control trees slightly easier to read. - Call bli_gemm_cntl_ukrs() instead of the cntl_gemm_ukrs() macro in all bli_*_front() functions (which is needed to apply the row/column preference optimization). - In all level-3 bli_*_cntl_init() functions, changed the _obj_create() function arguments corresponding to the gemm_ukrs fields in higher- level cntl tree nodes to NULL. - Removed some old her2k macro-kernels. commit cefd3d5d2001264de17cf63dae541f890cb9daaf Author: Tyler Smith Date: Thu Feb 5 11:09:12 2015 -0600 A couple of functions were incorrectly ifdeffed away on Xeon Phi. Fixed this commit 7574c9947d57a19f613880e3b9f62f8c8f6df4ec Author: Field G. Van Zee Date: Wed Feb 4 12:11:55 2015 -0600 Added basic flop-counting mechanism (level-3 only). Details: - Added optional flop counting to all level-3 front-ends, which is enabled via BLIS_ENABLE_FLOP_COUNT. The flop count can be reset at any time via bli_flop_count_reset() and queried via bli_flop_count(). Caveats: - flop counts are approximate for her[2]k, syr[2]k, trmm, and trsm operations; - flop counts ignore extra flops due to non-unit alpha; - flop counts do not account for situations where beta is zero. commit ceda4f27d1f1bcf19320e09848e0f2e3b9941e6c Author: Field G. Van Zee Date: Thu Jan 29 13:22:54 2015 -0600 Implemented bli_obj_imag_equals(). Details: - Implemented a new function, bli_obj_imag_equals(), which compares the imaginary part of the first argument to the second argument, which may be a BLIS_CONSTANT or of a regular real datatype. commit 81114824a05a9053229efd577a8a94a856deda93 Author: Field G. Van Zee Date: Tue Jan 6 12:15:21 2015 -0600 Minor 4m/3m consolidation to mem_pool_macro_defs.h. Details: - Merged the 4m and 3m definitions in bli_mem_pool_macro_defs.h to reduce code and improve readability. commit 36a9b7b7436d9423ba4de2a9f85cfcd43577b783 Author: Tyler Michael Smith Date: Wed Dec 17 21:53:50 2014 +0000 reduced the default number of MC by KC blocks for bgq commit c60619c7c3568f044a849abbab60209aa7455423 Author: Field G. Van Zee Date: Tue Dec 16 17:08:22 2014 -0600 Minor tweaks for 3m4m test drivers. Details: - Changed gemm_kc blocksizes to be reduced by two-thirds instead of half. - Changed 3m4m/test_gemm.c driver to divide by 3 instead of 2 when computing the fixed k dimension. - Fixed runme.sh so that it would use multiple threads for s/dgemm cases. commit c6929ba6a5e6f633a7295e979a2b8df8c7ecdb1b Author: Field G. Van Zee Date: Tue Dec 16 11:27:50 2014 -0600 Added 4m_1b to test/3m4m test driver and script. commit 785d480805fc0d6f4251b5499933515740b6b2a7 Merge: 9456f330 4156c088 Author: Field G. Van Zee Date: Fri Dec 12 14:34:19 2014 -0600 Merge branch 'master' of github.com:flame/blis commit 9456f330af4617f9ee32972d51f974aa2d84f97b Author: Field G. Van Zee Date: Fri Dec 12 14:31:57 2014 -0600 Added 4m_1b implementation for gemm. Details: - Added yet another 4m-based implementation for complex domain level-3 operations. This method, which the 3m/4m paper identifies as Algorithm "4m_1b" fissures the first loop around the micro-kernel so that the real sub-panel of the current micro-panel of B is multiplied against (both sub-panels of) all micro-panels of A, before doing the same for the imaginary sub-panel of the micro-panel of B. For now, only gemm is supported, and 4m_1b (labeled "4mb" within the framework) is not yet integrated into the test suite. commit 4156c0880d9aea4ff04a9c4fa139ba8c437d8bfb Author: Field G. Van Zee Date: Tue Dec 9 16:03:14 2014 -0600 Fixed obscure level-2 packing / general stride bug. Details: - Fixed a bug in certain structured level-2 operations that manifested only when the structured matrix was provided to BLIS as matrix stored with general stride. The bug was introduced in c472993b when the densify field was removed from the packm control tree node and associated APIs. Since then, the packed object was unconditionally marked with an uplo field of BLIS_DENSE. This is fine for level-3 operations where micro-panels are always densified, but in level-2 contexts, the underlying unblocked variant (fused or unfused) of structured operations (e.g. trmv) still needs to know whether to execute its "lower" or "upper" branches of code. Since this field was unconditionally being set to BLIS_DENSE, the unblocked variants were always executed the "else" branch, which happened to be the "lower" case code. Thus, running an upper case produced the wrong answer. This most obviously manifested in the form of failures for trmm, trmm3, and trsm in the test suite. The bug was fixed by setting the packed object's uplo field to BLIS_DENSE only if the schema indicated that micro-panels were to be packed. Otherwise, we can assume we are packing to regular row or column storage, as is the case with level-2 packing. Thanks to Francisco Igual for reporting the testsuite failures and ultimately leading us to this bug. commit 689f60a578b461119e9ea90c74f642b9eb79addb Merge: bef24e67 483e4d6a Author: Field G. Van Zee Date: Sun Dec 7 14:03:30 2014 -0600 Merge pull request #21 from figual/master Adding armv8a configuration and micro-kernels. commit 483e4d6a3fdbef9d9ab47fb674c9476c70ca9f0f Author: Francisco D. Igual Date: Sun Dec 7 20:27:49 2014 +0100 Adding armv8a configuration and micro-kernels. Only sgemm micro-kernel is fully functional at this point. commit bef24e67e0f93579c2a80315348dc2e227f72a72 Author: Tyler Smith Date: Wed Nov 26 18:00:56 2014 -0600 Fixed a type of race condition exposed by pthreads implementation. Lead thread of the inner thread communicator could exit subproblem, move on the next iteration of the loop and modify a1_pack, b1_pack, or c1_pack while other threads were still using those. Barriers were inserted to fix this. commit 76bde44411f0e34266bab9d666a54ef22be97320 Merge: e56e6143 f3d729e5 Author: Field G. Van Zee Date: Wed Nov 26 17:25:24 2014 -0600 Merge branch 'master' of github.com:flame/blis commit f3d729e504ec012e7dc7e02b2ecd42e004c6894d Author: Tyler Michael Smith Date: Wed Nov 26 22:25:24 2014 -0600 Added static mutex to bli_init and bli_finalize commit d71cc797866ff502ad1127527016f463267eef80 Author: Tyler Michael Smith Date: Wed Nov 26 21:35:39 2014 -0600 Refactored bli_threading files and added support for pthreads commit e56e61438ff7fcf25a48c0b7603f18df782b50b6 Author: Field G. Van Zee Date: Wed Nov 26 17:20:35 2014 -0600 Minor cleanups to bli_threading.h and friends. Details: - No longer need to define BLIS_ENABLE_MULTITHREADING manually in bli_config.h; it now gets defined when BLIS_ENABLE_OPENMP or BLIS_ENABLE_PTHREADS is defined. - Added sanity check to prevent both BLIS__ENABLE_OPENMP and BLIS_ENABLE_PTHREADS from being enabled simultaneously. - Reorganization of bli_threading*.h header files, which led to simplification of threading-related part of blis.h. - added "-fopenmp -lpthread" to LDFLAGS of sandybridge make_defs.mk file. commit 3be2744cbe2c56d38c23fd818aa5c1f10cc7ea51 Author: Field G. Van Zee Date: Fri Nov 21 12:28:08 2014 -0600 Update to template gemm ukernel comments. Details: - Updated comments on alignment of a1 and b1 to match wiki. commit 994429c6881b2ade92d9d7949bcaebfbf2cc65eb Merge: 58796abd 694029d9 Author: Field G. Van Zee Date: Thu Nov 20 13:55:35 2014 -0600 Merge pull request #20 from TimmyLiu/master #define PASTEF773 required by cblas compatibility layer commit 694029d9d7db857d642ab536955c0621791108c8 Author: Timmy Date: Wed Nov 19 15:25:14 2014 -0600 #define PASTEF773 required by cblas compatiility layer commit 58796abda66b133346f8d523b39178afc336351f Author: Field G. Van Zee Date: Thu Nov 6 14:31:52 2014 -0600 Removed KC constraint comments from _kernel.h files. Details: - Since 4674ca8c, the constraint that KC be a multiple of both MR and NR have been relaxed, and thus it was time to remove the comments from the top of the bli_kernel.h files of all configurations. commit 7bbc95a54f706d43c7f7951f0e5995f86130cd52 Author: Field G. Van Zee Date: Wed Oct 29 10:52:23 2014 -0500 Added new piledriver micro-kernels. Details: - Added new micro-kernels for the AMD piledriver architecture (one for each datatype). - Updates and tweaks to piledriver configuration. - Added 3xk packm micro-kernel support. - Explicitly unrolled some of the smaller packm micro-kernels. - Added notes to avx/sandybridge and piledriver micro-kernel files acknowledging the influence of the corresponding kernel code in OpenBLAS. commit 59613f1d5500f6279963327db2fbc84bc9135183 Author: Field G. Van Zee Date: Thu Oct 23 17:21:37 2014 -0500 Added separeate micro-panel alignment for A and B. Details: - Changed the recently-added micro-panel alignment macros so that we now have two sets--one for micro-panels of matrix A and one for micro- panels of matrix B: BLIS_UPANEL_[AB]_ALIGN_SIZE_?. - Store each set of alignment values into a separate blksz_t object in bli_gemm_cntl_init(). - Adjusted packm_init() to use the separate alignment values. - Added query routines for the new alignment values to bli_info.c. - Modified test suite output accordingly. commit a8e12884ee1fddd3fd77ca5a68aa0cb857f3af57 Author: Field G. Van Zee Date: Thu Oct 23 11:35:48 2014 -0500 CHANGELOG update (0.1.6) commit 38ea5022e4ed846112198c4e1672fcdaeb90dc71 Author: Field G. Van Zee Date: Thu Oct 23 11:35:45 2014 -0500 Version file update (0.1.6) commit a3e6341bdb0e28411f935d6b4708a6389663e004 Author: Field G. Van Zee Date: Thu Oct 23 11:13:28 2014 -0500 Factored common code from blocksize functions. Details: - Split bli_determine_blocksize_[fb]() into two functions each, the newer ones ending with the _sub suffix. These new sub-functions are now called from bli_[gemm|trmm|trsm]_determine_kc_[fb](), which eliminates redundant code and will allow any future tweaks to the core sub-functions to automatically be inherited by the operation- specific versions. commit 4674ca8cffb58331ff7edf23bbe0e3f6a7558489 Author: Field G. Van Zee Date: Thu Oct 23 10:50:59 2014 -0500 Extended newly relaxed KC to hemm, symm. Details: - These changes were intended for the previous commit. - Defined bli_gemm_determine_kc_[fb]() and bli_gemm_determine_kc_[fb](), which determine blocksizes for gemm-based operations, taking special care to "nudge" the kc dimension up to a multiple of MR or NR for hemm and symm operations, as needed. - Changed bli_gemm_blk_var3f.c to call bli_gemm_determine_kc_f(). instead of bli_determine_blocksize_f(). - Comment updates to bli_trmm_blocksize.c, bli_trsm_blocksize.c. commit ab954ba6f874eaca7b001804491f866ef6b9b327 Author: Field G. Van Zee Date: Wed Oct 22 17:21:58 2014 -0500 Relaxed constraint that KC be multiple of MR, NR. Details: - Relaxed a long-held requirement in register blocksizes that required the kernel programmer to choose a KC that was divisible by both MR and NR. This was very constraining on some architectures that did not use register blocksizes that were powers of two. The constraint is now enforced only for trmm and trsm, where it is needed, and it is now handled by "nudging" kc upward at runtime, if necessary, to be a multiple of MR or NR, as needed. - Defined bli_trmm_determine_kc_[fb]() and bli_trsm_determine_kc_[fb](), which determine blocksizes for trmm and trsm, taking special care to "nudge" the kc dimension up to a multiple of MR or NR, as needed. - Changed bli_trmm_blk_var3[fb].c to call bli_trmm_determine_kc_[fb]() instead of bli_determine_blocksize_[fb](). - Added safeguard to bli_align_dim_to_mult() that returns the dimension unmodified if the dimension multiple is zero (to avoid division by zero). - Removed cpp guard/check for KC % MR == 0 and KC % NR == 0 from bli_kernel_macro_defs.h. - Whitespace, variable name changes to bli_blocksize.c. - Removed old commented code from bli_gemm_cntl.c. commit 95cdae65d6b88e043ee14bcd53cd2e800d7aecb4 Author: Tyler Smith Date: Wed Oct 22 16:30:16 2014 -0500 Fixed bug in KNC microkernel where k=0 and beta != 1 commit e64dba5633fc49b768b5edc7762f2b5d8a4d0588 Author: Field G. Van Zee Date: Mon Oct 20 19:23:06 2014 -0500 Re-implemented micro-panel alignment. Details: - This commit re-implements a feature that was removed in commit c2b2ab62. It was removed because, at the time, I wasn't sure how the micro-panel alignment feature would interact with the 4m method (when applied at the micro-kernrel level), and so it seemed safer to disable the feature entirely rather than allow possible breakage. This commit revisits the issue and safely re-implements the feature in a way that is compatible with 4m, 3m, 4mh, and 3mh (and native execution). - Modified the static memory pool to account for micro-panel alignment space. - Modified packm_init and blocked variants to align whole micro-panels by a datatype-specific alignment value that may be set by the configuration. (If it is not set by the configuration, it will default to BLIS_SIZEOF_?.) - Modified macro-kernels so that: - storage stride is handled properly given the new micro-panel alignment behavior; - indexing through 3m/4m/rih-type sub-panels, as is done by trmm and trsm, is more robust (e.g. will work if the applicable packing register blocksize is odd); - imaginary strides are computed and stored within auxinfo_t structs, which allows the virtual micro-kernels to more easily determine how to index into the micro-panel operands. - Modified virtual 3m and 4m micro-kernels to use the imaginary strides within the auxinfo_t structs instead of panel strides. - Deprecated the panel stride fields from the auxinfo_t structs. - Updated test suite to print out the micro-panel alignment values. commit add16b0e5402924301e7078e4ca5e3ef725bff0b Author: Field G. Van Zee Date: Fri Oct 17 11:49:24 2014 -0500 Added 3m4m test driver subdir of 'test'. Details: - Added a modified test driver for [cz]gemm that will test all 3m/4m as well as assembly-based and OpenBLAS implementations of gemm in single and multithreaded modes. commit e171504a72406c61a173241d8bccf0a5ceb10582 Author: Field G. Van Zee Date: Fri Oct 17 11:25:59 2014 -0500 Use correct definition of bli_is_last_iter(). Details: - As intended for previous commit, the new definition of bli_is_last_iter() is now disabled in favor of the old definition. commit 0d954087b2b55d2f5f3c5e57d702b318ca2300f6 Author: Field G. Van Zee Date: Fri Oct 17 11:19:34 2014 -0500 Minor changes and fixes. Details: - Redefined bli_is_last_iter() to take thread_id and num_thread arguments, which allows the macro to correctly compute whether a given iteration is the last that the thread will compute in that particular loop. The new definition, however, remains disabled (commented out) until someone can look at this more closely, as the new definition seems to actually hurt performance slightly. - Whitespace and related updates to level-3 macro-kernels. - Updated test suite so that performance results in the hundreds of gigaflops does not disrupt the column alignment of the output. commit d1e86e1876e433f54b501ec5a005b4ba7c5ce4e6 Author: Field G. Van Zee Date: Sun Oct 12 13:43:47 2014 -0500 More minor tweaks to sandybridge/avx micro-kernel. Details: - Re-enabled use of b_next for dgemm and cgemm micro-kernels. commit 7b6fe4cae57cb22c09c1a97595e1a201a02cbcd2 Author: Field G. Van Zee Date: Sun Oct 12 12:01:51 2014 -0500 Minor tweaks to sandybridge/avx micro-kernels. Details: - Changed the MC blocksize for zgemm micro-kernel from 128 to 64. - Removed usage of b_next in all x86_64/avx gemm micro-kernels. commit a6a156e9feec47154e7a0fd43bcc006b1fc04aba Author: Field G. Van Zee Date: Fri Oct 10 14:26:41 2014 -0500 Added cgemm ukernel for avx/sandybridge. Details: - Implemented AVX-based cgemm micro-kernel (via GNU extended inline assembly syntax). - Updated sandybridge configuration accordingly. commit 6f8575ab2580e167a022293b76ddf0514f71b613 Author: Field G. Van Zee Date: Fri Oct 10 10:01:45 2014 -0500 Added zgemm ukernel for avx/sandybridge. Details: - Implemented AVX-based zgemm micro-kernel (via GNU extended inline assembly syntax). - Updated sandybridge configuration accordingly. commit 23ce7ee542a12ca40b4b6090ad2558d180e16d37 Merge: 99fd9a39 7a8ad47f Author: Field G. Van Zee Date: Thu Oct 9 16:41:22 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 99fd9a39718cb7281f6fb23f9fef7cca4fe514f4 Author: Field G. Van Zee Date: Thu Oct 9 16:38:04 2014 -0500 Fixed two minor bugs. Details: - Fixed a bug in the test suite for the trsm_ukr and gemmtrsm_ukr test modules whereby the uplo bits of some packed matrix objects were not being set properly, resulting in false FAILURE results for those tests. Thanks to Tyler Smith for bringing this issue to my attention. - Fixed a bug in bli_obj_alloc_buffer() that caused an unnecessary "not yet implemented" abort() when creating a 1x1 object with non-unit strides. commit 7a8ad47fb2d100a9da93aa8cab774fcceeaab733 Author: Tyler Smith Date: Wed Oct 8 15:52:13 2014 -0500 Minor changes to knc configuration, including preference row major storage Also fixed a bug in the knc micro-kernel where it would fail if k == 0 commit 76b7c34af0c09f47d9615b18857a356acddc788a Author: Field G. Van Zee Date: Thu Oct 2 14:15:38 2014 -0500 Fixed a bug in the pack schema-related bit macros. Details: - Expanded the BLIS_PACK_SCHEMA_BITS value in bli_type_defs.h to include all six bits presently used in the pack schema bitfield of the info field of obj_t structs. Prior to this commit, the macro constant only included the lowest five bits, which excluded the "is or is not packed" bit. This manifested as a strange bug in probably many level-2 codes that invoked packing, though we only observed it in ger before fixing. Thanks to Devin Matthews for finding and reporting this bug. commit a5763e332226598d70c47dfa9cad4578e15ef5f4 Author: Field G. Van Zee Date: Thu Oct 2 13:28:17 2014 -0500 Added extra output to bli_obj_print(). Details: - Print extra values from info field of obj_t struct within bli_obj_print(). commit 9bba209fc44fbfce943ba6a51cd8278a0cb6b159 Author: Tyler Smith Date: Mon Sep 29 14:56:36 2014 -0500 Fixed bug when packing anywhere besides in blk_var_1 for gemm. commit 614a4afc9272adb47e5a8b83b39d56c2804d95d6 Merge: b541b667 4a7df04e Author: Tyler Smith Date: Fri Sep 26 10:49:57 2014 -0500 Merge branch 'master' of http://github.com/flame/blis commit 4a7df04e8a4ffdb9561d26426afd35e4fe15b013 Author: Field G. Van Zee Date: Mon Sep 22 16:06:15 2014 -0500 Added 30xk support for packm ukernels. Details: - Updated bli_kernel_*_macro_defs.h headers to include default definitions for 30xk packm kernels. - Extended function pointer arrays in bli_packm_cxk_*() out to 31 and included 30xk kernels. - Addex 30xk kernels to frame/1m/packm/ukernels/bli_packm_ref_cxk_*.c. commit b6d4bd792e0d44ce4b28afef343f5ff3ba89c285 Author: Field G. Van Zee Date: Mon Sep 22 16:02:37 2014 -0500 Fixed missing tabs from Makefile patch. commit 32630f9b6f0d5ba28d5b56dae4c7288a37158743 Author: Field G. Van Zee Date: Fri Sep 19 17:18:20 2014 -0500 Comment update to virtual micro-kernels. commit 13447cffead7c6d137a7a3ccbf9e552ed0477467 Author: Field G. Van Zee Date: Fri Sep 19 13:00:48 2014 -0500 Minor bugfix to top-level Makefile. Details: - Applied a patch that allows the top-level Makefile to work on certain systems. The patch simply separates out the source-to-object code generation rules for .c and .S files into two separate rules. Thanks to Devin Matthews for submitting this patch. commit e80a4537846416719c067ae08a53aeda978c572d Author: Field G. Van Zee Date: Thu Sep 18 10:24:20 2014 -0500 Fixed bug introduced by bugfix in 25b258d. Details: - We actually need to check alignment of lda*sizeof(double) and NOT a+lda because in the latter case, alignment could cancel out and still allow the optimized code to run when it shouldn't. Thanks to Devin for pointing this out. commit 25b258d61f9c8cee64e922f4131784b6edb196dd Author: Field G. Van Zee Date: Thu Sep 18 10:10:49 2014 -0500 Fixed a non-fatal problem with bugfix in a68b316c. Details: - The bugfix in a68b316c was inadvertantly checkin alignment of the leading dimension itself, rather than the byte size of the leading dimension. Now, we simply check alignment of a+lda. commit 96302d4fc81363410e41c3a3c43a65df44d97ad9 Author: Field G. Van Zee Date: Thu Sep 18 09:43:40 2014 -0500 Renamed bli_info_get_*_ukr_type() functions. Details: - Added _string() suffix to bli_info_get_*_ukr_type() function names. This makes them consistent with the bli_info_get_*_impl_string() functions. commit a68b316ca4852509f84ed50e01afac486bf70f58 Author: Field G. Van Zee Date: Wed Sep 17 11:10:07 2014 -0500 Fixed alignment bugs in level-1f kernels. Details: - Fixed bugs whereby the level-1f dotxf, axpyxf, and dotxaxpyf kernels were attempting to compute problems with unaligned leading dimensions with optimized code, rather than (correctly) using the reference implementations. Thanks to Devin Matthews for reporting this bug. commit 870761eb902e4866090d1d3446a345df3d6d4599 Merge: e9899be0 a2b59a37 Author: Field G. Van Zee Date: Tue Sep 16 18:20:49 2014 -0500 Merge branch 'master' of github.com:flame/blis commit e9899be09044829e23386bd73e394f1dd7778210 Author: Field G. Van Zee Date: Tue Sep 16 18:19:32 2014 -0500 Added high-level implementations of 4m, 3m. Details: - Added "4mh" and "3mh" APIs, which implement the 4m and 3m methods at high levels, respectively. APIs for trmm and trsm were NOT added due to the fact that these approaches are inherently incompatible with implementing 4m or 3m at high levels (because the input right-hand side matrix is overwritten). - Added 4mh, 3mh virtual micro-kernels, and updated the existing 4m and 3m so that all are stylistically consistent. - Added new "rih" packing kernels (both low-level and structure-aware) to support both 4mh and 3mh. - Defined new pack_t schemas to support real-only, imaginary-only, and real+imaginary packing formats. - Added various level0 scalar macros to support the rih packm kernels. - Minor tweaks to trmm macro-kernels to facilitate 4mh and 3mh. - Added the ability to enable/disable 4mh, 3m, and 3mh, and adjusted level-3 front-ends to check enabledness of 3mh, 3m, 4mh, and 4m (in that order) and execute the first one that is enabled, or the native implementation if none are enabled. - Added implementation query functions for each level-3 operation so that the user can query a string that describes the implementation that is currently enabled. - Updated test suite to output implementation types for reach level-3 operation, as well as micro-kernel types for each of the five micro- kernels. - Renamed BLIS_ENABLE_?COMPLEX_VIA_4M macros to _ENABLE_VIRTUAL_?COMPLEX. - Fixed an obscure bug when packing Hermitian matrices (regular packing type) whereby the diagonal elements of the packed micro-panels could get tainted if the source matrix's imaginary diagonal part contained garbage. commit a2b59a37f166f70a6dd5793db2530823ef590c2b Author: Tyler Smith Date: Mon Sep 15 10:44:44 2014 -0500 Fixed make defs so that they actually compile for bulldozer commit 86fc7e40764f78ec217f50216ef4fa5b57dbfbc7 Author: Tyler Smith Date: Mon Sep 15 10:35:46 2014 -0500 Added bulldozer configuration and updated piledriver micro-kernel commit 0644e61a79a57f136be5f4c47b9099cff2af06e0 Author: Field G. Van Zee Date: Thu Sep 11 12:55:34 2014 -0500 Minor updates to bli_packm_init.c. commit 9dc9b44a057a08e20ad4d423344f0ecad54c1eb2 Author: Field G. Van Zee Date: Thu Sep 11 12:03:28 2014 -0500 Renamed bli_obj_pack_status() to _pack_schema(). Details: - Renamed the bli_obj_pack_status() macro to bli_obj_pack_schema() in order to help avoid confusion as to what the macro returns. commit cf5efdde0588a0d5b6ea57fe7d7be5000be06f8e Author: Field G. Van Zee Date: Thu Sep 11 11:47:56 2014 -0500 Pass pack_t schemas into ukernels via auxinfo_t. Details: - Modified macro-kernels to pass the pack_t schema values for matrices A and B into the datatype-specific functions, where they are now inserted into a newly-expanded auxinfo_t struct. This gives gives the micro-kernels access to the pack_t schema values embedded in the control trees, which determine the precise format into which the matrix elements are packed. - Updated a call to bli_packm_init_pack() in src/test_libblis.c to remove densify argument. Meant to include this in commit c472993b. commit cc8d2b82775cca3c2d51bf427f4e77c8024a6d15 Author: Field G. Van Zee Date: Tue Sep 9 13:48:22 2014 -0500 Updated old test drivers in 'test'. commit c472993bbccb69e9ffc409c79b742426c8ad2ad4 Author: Field G. Van Zee Date: Tue Sep 9 13:42:04 2014 -0500 Removed densify argument to packm_cntl_obj_create(). Details: - Removed the "densify" bool_t argument to bli_packm_cntl_obj_create(). This argument was inserted very early in BLIS's development, when it was anticipated that the developer may sometimes wish to pack a Hermitian, symmetric, or triangular matrix without making it dense. But as it turns out, if we are packing a matrix, we always want to make it dense in some way or another due to the fact that the micro- kernel only multiplies dense micro-panels. Thus, unless/until there is a real need for the feature, it seems reasonable to remove it from the packm_cntl API. commit 5c43ee387146cd76dc59b730dac6683a8446b834 Author: Field G. Van Zee Date: Mon Sep 8 15:19:29 2014 -0500 Moved trmm4m/3m_cntl files to 'old' directory. Details: - Meant to include this in previous commit. commit 7b2f469d5465ed73b1ca88124bc9a1987388aa27 Author: Field G. Van Zee Date: Mon Sep 8 14:49:50 2014 -0500 Retired trmm_t control tree definitions, usage. Details: - Replaced all trmm_t control tree instances and usage with that of gemm_t. This change is similar to the recent retirement of the herk_t control tree. - Tweaked packm blocked variants so that the triangular code does NOT assume that k is a multiple of MR (when A is triangular) or NR (when B is triangular). This means that bottom-right micro-panels packed for trmm will have different zero-padding when k is not already a multiple of the relevant register blocksize. While this creates a seemingly arbitrary and unnecessary distinction between trmm and trsm packing, it actually allows trmm to be handled with one control tree, instead of one for left and one for right side cases. Furthermore, since only one tree is required, it can now be handled by the gemm tree, and thus the trmm control tree definitions can be disposed of entirely. - Tweaked trmm macro-kernels so that they do NOT inflate k up to a multiple of MR (when A is triangular) or NR (when B is triangular). - Misc. tweaks and cleanups to bli_packm_struc_cxk_4m.c and _3m.c, some of which are to facilitate above-mentioned changes whereby k is no longer required to be a multiple of register blocksize when packing triangular micro-panels. - Adjusted trmm3 according to above changes. - Retired trmm_t control tree creation/initialization functions. commit 576e9e9255a79dba9cd3c804267f51e0b4aa6e8a Author: Field G. Van Zee Date: Sun Sep 7 16:12:52 2014 -0500 Retired herk_t control tree definitions, usage. Details: - Replaced all herk_t control tree instances and usage with that of gemm_t, since the two types presently have the same fields. This means that herk, her2k, syrk, and syr2k can simply use the gemm control tree as-is, just as hemm and symm have been doing for some time now. - Retired herk_t control tree creation/initialization functions. - Retired many _target.c and .h files into 'old' directories. commit b2fed052c9a23d858ef0afbe220b342bce9aa7f7 Author: Field G. Van Zee Date: Wed Sep 3 17:07:25 2014 -0500 Minor code cleanup to bli_packm_struc_cxk*.c Details: - Realized that we don't need to track rs_p11 and cs_p11 for Hermitian/symmetric case of bli_packm_struc_cxk*(). They are always equal to rs_p and cs_p. commit 023ce770966b3b5a98bba729c5af1f45e15ebb97 Author: Field G. Van Zee Date: Wed Sep 3 10:47:53 2014 -0500 Minor update to packm_cxk kernels. Details: - Changed m and n dimension parameter names to panel_dim and panel_len, respectively, in packm_cxk, packm_cxk_3m, packm_cxk_4m kernel wrapper functions. This makes the code a little easier to read since "m" and "n" have connotations that are not applicable here. - Comment updates. commit 189def3667d9218adbeec45e2801fd074341a679 Author: Field G. Van Zee Date: Mon Sep 1 16:23:17 2014 -0500 Retired portions of bli_kernel_3m/4m_macro_defs.h. Details: - Removed sections of bli_kernel_[4m|3m]_macro_defs.h that defined 4m/3m-specific blocksizes after realizing that this can be done in bli_gemm[4m|3m]_cntl.c, since that is (mostly) the only place they are used. - The maximum cache values for 4m/3m are stll needed when computing mem pool dimensions in bli_mem_pool_macro_defs.h. As a workaround, "local" definitions in terms of the regular cache blocksizes are now in place. - Similarly, the register blocksizes for 4m/3m are still needed in bli_kernel_post_macro_defs.h. As a workaround, "local" definitions in terms of the regular register blocksizes are now in place. commit af521ee6f2a77d61c98b833e85c09969987bc00d Author: Field G. Van Zee Date: Mon Sep 1 14:06:46 2014 -0500 Changed semantics of blocksize extensions. Details: - Changed semantics of cache and register blocksize extensions so that the extended values are tracked, rather than just the marginal extensions. - BLIS_EXTEND_[MKN]C_? has been renamed BLIS_MAXIMUM_[MKN]C_?. - BLIS_EXTEND_[MKN]R_? has been renamed BLIS_PACKDIM_[MKN]R_?. - bli_blksz_ext_*() APIs have been renamed to bli_blksz_max_*(). Note that these "max" query routines grab the maximum value for cache blocksizes and the packdim value for register blocksizes. - bli_info_*() API has been updated accordingly. - All configurations have been updated accordingly. commit 07f23aefd52f5ba4960dbd46e59b180a2136b8e9 Author: Field G. Van Zee Date: Sun Aug 31 11:58:50 2014 -0500 Pass pack schema into packm_struc_cxk*(). Details: - Changed the interface to the packm_struc_cxk*() kernels to include the pack_t schema. This allows the implementation to more easily determine how the micro-panel is stored (row-stored column panel or column-stored row panel). - Updated packm blocked variants to pass in the schema. - Updated packm_ker_t function pointer definition accordingly. commit f032ba9b1186cb02184574d339565f53d733aa42 Author: Field G. Van Zee Date: Sat Aug 30 16:21:20 2014 -0500 Reorganized packm implementation. Details: - Reorganized packm variants and structure-aware kernels so that all routines for a given pack format (4m, 3m, regular) reside in a single file. - Renamed _blk_var4 to _blk_var2 and generalized so that it will work for both 4m and 3m, and adjusted 4m/3m _cntl_init() functions accordingly. - Added a new packm_ker_t function pointer type to bli_kernel_type_defs.h to facilitate function pointer typecasting in the datatype-specific packm_blk_var2() functions. - Deprecated _blk_var3. - Fixed a bug in the triangular micro-panel packing facility that affected trmm and trmm3 with unit diagonals. commit c6793cecb70788bdf2c76ab8102504ea97be9d2a Author: Field G. Van Zee Date: Thu Aug 28 17:14:48 2014 -0500 Reorganized #includes for scalar macro headers. Details: - Reordered the #include statements in bli_scalar_macro_defs.h so that conventional, ri-, and ri3-based macros are grouped together. - Renamed bli_eqri.h (and macros within) to end with 'ris' suffix. commit b4da8907284345be4374f87a88679c4886ab866e Author: Field G. Van Zee Date: Thu Aug 28 14:10:32 2014 -0500 Whitespace, comments updates on packm_blk_var?.c. commit 46e46a1d83da586c3dd9fd7a01eb16067abbaee1 Author: Field G. Van Zee Date: Thu Aug 28 12:05:45 2014 -0500 Minor updates to packm blocked, cxk_3m/4m code. Details: - Added 'const' qualifier to inlined packing code that handles micro-panel packing that is too large for an existing packm ukernel. - Comment updates. commit 908dc688b5979995eaacb3aa937f241551a8df00 Author: Field G. Van Zee Date: Thu Aug 28 11:55:12 2014 -0500 Pass pack schema into blocked packm routines. Details: - Rather than passing the packm blocked routines a boolean value that represents whether the matrix is being packed to row or column storage, we now pass in the pack schema itself. commit a0ff6066e06075ab5f92b19247b39b92ed15f1bf Merge: c4c99c48 d40b32bc Author: Field G. Van Zee Date: Sun Aug 24 15:56:21 2014 -0500 Merge branch 'master' of github.com:flame/blis commit c4c99c4813bf9817592a7899c5d33412fe22313f Author: Field G. Van Zee Date: Sun Aug 24 15:52:22 2014 -0500 Renamed packm scalar from beta to kappa. Details: - The packm implementation (i.e. sources files in frame/1m/packm and frame/1m/packm/ukernels), interchangeably used the names "beta" and "kappa" to refer to the optional scalar to be applied during packing. This commit renames all uses of "beta" to be "kappa", since "beta" sometimes evokes the scalar specifically on the output matrix of a level-2 or level-3 operation. commit d40b32bc24ffbae24123e054307b3138969bb095 Merge: 9331f794 6c25c379 Author: Field G. Van Zee Date: Sun Aug 24 13:46:36 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 6c25c379fadb50834146e1614f7b80c093c2aad0 Author: Field G. Van Zee Date: Sun Aug 24 13:44:10 2014 -0500 Consolidated unpackm ukernels into single file. Details: - Reorganized unpackm ukernels into a single file, bli_unpackm_ref_cxk.c, in a manner similar to what was done for packm ukernels in commit 4cc2b46. commit 9331f79443223fe267676ee54c439e1ed320380c Merge: 7fc48a7d 670b6392 Author: Field G. Van Zee Date: Sun Aug 24 10:54:21 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 670b63926a7f4fc694abc5b1582ef8a4f367f5a8 Author: Field G. Van Zee Date: Sun Aug 24 10:46:27 2014 -0500 Added whitespace to bli_obj_scalar_ routine calls. Details: - Added extra spaces to align arguments of bli_obj_scalar_init_detached_copy_of(). This misalignment was due to the fact that the function was previously named bli_obj_init_scalar_copy_of() and the name change, performed in b444489f, was done via recursive sed commands which left subsequent lines untouched. commit 7fc48a7d920e07fd8e9528ab2565123f8f4e67f9 Author: Field G. Van Zee Date: Sat Aug 23 16:50:58 2014 -0500 Combined 4m/3m bits into an expanded bitfield. Details: - Combined the 4m/3m bits into an expanded bitfield, which will encode the packing "format" of the micro-panels. This will allow for more easily and compactly encoding additional formats. - Other minor comment/whitespace updates to bli_type_defs.h. - Updated bli_obj_macro_defs.h and bli_param_macro_defs.h to use the new format bitfield. - Comment update to bli_kernel_post_macro_defs.h. - Whitespace changes to bli_kernel_3m_macro_defs.h, _4m_macro_defs.h. commit ef0143cc1417e4815e4cafd5a464cc83fe7a1e86 Author: Field G. Van Zee Date: Sat Aug 23 14:02:27 2014 -0500 Renamed _ri, _ri3 packm ukernels to _4m, _3m. Details: - Renamed packm ukernels, _cxk dispatcher, and structure-aware _cxk helper functions to use _4m and _3m instead of _ri and _ri3 suffixes. - Updated names of cpp macros that correspond to packm ukernels. commit b0ccac116158b5ed3316d34798748ba0c6d78672 Author: Field G. Van Zee Date: Thu Aug 21 19:21:52 2014 -0500 Cleaned up front-end layering for 4m/3m. Details: - Added an extra layer to level-3 front-ends (examples: bli_gemm_entry() and bli_gemm4m_entry()) to hide the control trees from the code that decides whether to execute native or 4m-based implementations. The layering was also applied to 3m. - Branch to 4m code based on the return value of bli_4m_is_enabled(), rather than the cpp macros BLIS_ENABLE_?COMPLEX_VIA_4M. This lays the groundwork for users to be able to change at runtime which implementation is called by the main front-ends (e.g. bli_gemm()). - Retired some experimental gemm code that hadn't been touched in months. commit bedec95451cabfa7a8906b51018a5e0572998a5e Author: Field G. Van Zee Date: Thu Aug 21 18:25:48 2014 -0500 Added bli_4m API for querying 4m enabled state. Details: - Added bli_4m.c (and header), which defines a simple API that can be used to query, enable, and disable 4m-based complex support in BLIS. The macros BLIS_ENABLE_?COMPLEX_VIA_4M are now used to initialize the variable that determines the state (enabled or disabled). - Changed bli_info*() API so that all cache and register blocksize- related query routines return the blksz_t objects' values as they exist at runtime, rather than return the values as determined by the configuration system (e.g. bli_kernel.h, or defaults for those values not specified). This sets the foundation for being able to change those blocksizes at runtime. commit b541b667cabfa6d41b50ad1e49209651ee6812cc Merge: 699a8151 dd61307f Author: Tyler Smith Date: Wed Aug 20 14:44:51 2014 -0500 Merge branch 'master' of http://github.com/flame/blis Conflicts: frame/3/trsm/bli_trsm_blk_var2b.c frame/3/trsm/bli_trsm_blk_var2f.c commit 699a8151ca3d5021e834a1784ef45dcc3a3d17cd Author: Tyler Smith Date: Wed Aug 20 14:43:17 2014 -0500 Some improvements to trsm parallelism commit dd61307f55bb6bc762fe0ef0446479d6c0536723 Author: Field G. Van Zee Date: Wed Aug 20 09:52:16 2014 -0500 Minor update to sandybridge MC_S, KC_S. Details: - Changed sandybridge MC and KC for single-precision real to 128 and 384, respectively. - Updated comments in template configuration's gemm micro-kernel file to document the new "contiguous row preference" macro. commit d0eec4bddd740ce360d0f655362c551287cf925b Author: Field G. Van Zee Date: Tue Aug 19 15:49:19 2014 -0500 Added optional row preference to ukernel config. Details: - Added the ability for the kernel developer to indicate the gemm micro- kernel as having a preference for accessing the micro-tile of C via contiguous rows (as opposed to contiguous columns). This property may be encoded in bli_kernel.h as BLIS_?GEMM_UKERNEL_PREFERS_CONTIG_ROWS, which may be defined or left undefined. Leaving it undefined leads to the default assumption of column preference. - Changed conditionals in frame/3/*/*_front.c that induce transposition of the operation so that the transposition is induced only if there is disagreement between the storage of C and the preference of the micro-kernel. Previously, the only conditional that needed to be met was that C was row-stored, which is to say that we assumed the micro- kernel preferred column-contiguous access on C. - Added a "prefers_contig_rows" property to func_t objects, and updated calls to bli_func_obj_create() in _cntl.c files in order to support the above changes. - Removed the row-storage optimization from bli_trsm_front.c because it is actually ineffective. This is because the right-side case of trsm flips the A and B micro-panel operands (since BLIS only requires left-side gemmtrsm/trsm kernels), meaning any transposition done at the high level is then undone at the low level. - Tweaked trmm, trmm3 _front.c files to eliminate a possible redundant invocation of the bli_obj_swap() macro. commit 4cc2b464f29cafbfef9295b073b857fe0752f710 Author: Field G. Van Zee Date: Fri Aug 15 11:49:15 2014 -0500 Reorganized packm ukernels. Details: - Previously, packm micro-kernels were organized by the implied register blocksize (panel dimension) assumed by the kernel, meaning conventional, ri, and ri3 variations of some micro-kernel size were housed in the same file. This commit reorganizes the micro-kernels so that all sizes reside in the same file for each format type (conventional, ri, and ri3). commit fcc10054a11b6fc3976986f57feccf741596cbf6 Author: Field G. Van Zee Date: Wed Aug 13 12:32:06 2014 -0500 Tweaks to gemm4m, gemm3m virtual ukernels. Details: - Fixed a potential, but as-yet unobserved bug in gemm3m that would allow undesirable inf/NaN propogation, since C was being scaled by beta even if it was equal to zero. - In gemm3m micro-kernel, we now avoid copying C to the temporary micro-tile if beta is zero. - Rearranged computation in gemm4m so that the temporary C micro-tile is accessed less, and C is accessed only after the micro-kernel calls. This improves performance marginally in most situations. - Comment updates to both gemm4m and gemm3m micro-kernels. commit cdcbacc2fa871317c8e7ef961ecc6d70ab22dc34 Author: Field G. Van Zee Date: Tue Aug 12 12:45:38 2014 -0500 Removed redundant redef of packm ukr prototypes. Details: - Removed redundant macro code that redefined packm ukernel prototypes when the previous macro was already sufficient. This helps de-clutter the packm ukernel prototyping headers a little bit. commit 82dac98d9032ccb598068a55ddf23d7898491e9e Author: Field G. Van Zee Date: Tue Aug 12 12:36:25 2014 -0500 Relocated packm ukernel #includes. Details: - Consolidated the #include statements for packm ukernel headers from bli_packm_cxk.h, bli_packm_cxk_ri.h, and bli_packm_cxk_ri3.h to bli_packm.h. - Comment/whitespace updates to bli_packm_blk_var3.c, _var4.c. commit 7f77856e25aad5fc6f172ed3e57b6351804e31a4 Author: Field G. Van Zee Date: Tue Aug 12 12:20:15 2014 -0500 Removed unused 4m/3m-related packm macro defs. Details: - Removed unused and unneeded s- and d-flavored macro definitions for packm ukernels related to the complex 4m and 3m methods, as implemented in BLIS. commit bc1d86b2d4d436b1dfba2d0098501aaca9cbb8b5 Author: Field G. Van Zee Date: Thu Aug 7 19:01:20 2014 -0500 Sandy Bridge configuration, micro-kernel update. Details: - Minor updates to bli_config and bli_kernel.h for sandybridge configuration. - Renamed existing AVX intrinsic-based micro-kernel file to bli_gemm_int_d8x4.c. - Added new file, bli_gemm_asm_d8x4.c, which provides assembly-based gemm micro-kernels for single- and double-precision real. commit 98ec95877a95242e159b2bf0c879115a59e4c6e2 Author: Field G. Van Zee Date: Thu Aug 7 18:28:32 2014 -0500 Corrected comment for _obj_is_[row|col]_stored(). Details: - Fixed a mistake in the comments introduced in the previous commit for bli_obj_is_row_stored() and bli_obj_is_col_stored(). commit 43d5e419e1b424d2143817103dbee8ead797e8aa Author: Field G. Van Zee Date: Thu Aug 7 18:20:40 2014 -0500 Reverted _obj_is_[row|col]_stored() macros. Details: - Rolled back recent changes to bli_obj_is_row_stored() and bli_obj_is_col_stored() so that those macros now only inspect the strides (row or column). It turns out that the more sophisticated definitions introduced in a51e32e are not necessary, because these "obj" macros are virtually never used on packed matrices, and when they are, they can use bli_obj_is_[row|col}_packed() macros, which inspect the info bitfield. commit 45692e3ad4b7e1d05ac4302398df4efce04b4284 Author: Field G. Van Zee Date: Thu Aug 7 13:21:15 2014 -0500 Reverted some accidental changes. Details: - Reverted some changes that were unintentionally included in the previous commit (9526ce98). Thanks to Tony Kelman for pointing this out. (Note: a few select changes were not reverted.) commit 9526ce98812be908bc4915f2849b657fb6ce1b49 Author: Field G. Van Zee Date: Wed Aug 6 14:13:46 2014 -0500 Updated copyright headers of emscripten configuration files. commit 30833ed71d56f231ddba21e632bcbbc90b12a97c Author: Field G. Van Zee Date: Wed Aug 6 12:12:03 2014 -0500 Minor edits to configurations' make_defs.mk files. Details: - Redefined CFLAGS, CFLAGS_NOOPT, and CFLAGS_KERNELS so that CFLAGS_NOOPT is defined first and then the other two are defined in terms of CFLAGS_NOOPT. This textually cleans up the definitions and makes them a little easier to read. commit 9d61afeae2ba70fe1df07e7546f6954ea83aed12 Author: Field G. Van Zee Date: Mon Aug 4 16:01:59 2014 -0500 CHANGELOG update (0.1.5) commit bde56d0ecfd0ec20330fac290b91a6dca0cf94e9 Author: Field G. Van Zee Date: Mon Aug 4 16:01:58 2014 -0500 Version file update (0.1.5) commit 4c6ceea4be35d089630986eb5b959b9e97214077 Author: Field G. Van Zee Date: Mon Aug 4 15:49:59 2014 -0500 Added CBLAS compatibility layer. Details: - Added a new section in bli_config.h files of all configurations for enabling CBLAS support. (Currently, the default is for the CBLAS layer to be disabled.) - Added a directory, frame/compat/cblas, to house CBLAS source code. A subdirectory 'f77_sub' holds subroutine wrappers corresponding to subroutines found in CBLAS that allow calling some BLAS routines with the return value passed as the last argument rather than as an actual (function) return value. This was probably intended to allow CBLAS to avoid the whole f2c debacle altogether. However, since BLIS does not assume the presence of a Fortran compiler, we had to provide similar routines in C. - A script, integrate-cblas-tarball.sh, is included to streamline the integration of future revisions of the CBLAS source code. - The current tarball, cblas.tgz, that was used with the above script to generate the present set of CBLAS source code is also included. - Updated blis.h to include necessary CBLAS-related headers. commit caab62dac0fb0bd0d674118f409c81680db94d29 Merge: 383631b5 db97ce97 Author: Field G. Van Zee Date: Sun Aug 3 14:36:18 2014 -0500 Merge pull request #19 from kevinoid/fix-install-perms-error Fix permissions error installing to non-owned directory commit db97ce979b88c051922c2f946ce52d523c7a12c6 Author: Kevin Locke Date: Sun Aug 3 12:48:04 2014 -0600 Fix permissions error installing to non-owned directory When installing to a directory which is not owned by the installing user, even when the user has write permission for the directory, the installation can fail with an error similar to the following: Installing libblis-0.1.4-7-sandybridge.a into /usr/local/lib/ install: cannot change permissions of ‘/usr/local/lib’: Operation not permitted Makefile:658: recipe for target '/usr/local/lib/libblis-0.1.4-7-sandybridge.a' failed make: *** [/usr/local/lib/libblis-0.1.4-7-sandybridge.a] Error 1 In the example case, the error occurred because the user attempted to install to /usr/local and /usr/local/lib is owned by root with mode 2755 which the Makefile unsuccessfully attempted to change to 0755. Given that installing to /usr/local is likely to be quite common and the ownership/permissions are the default for Debian and Debian-derived Linux distributions (perhaps others as well), this commit attempts to support that use case by using mkdir rather than install to create the directory (which is the same approach as Automake). Signed-off-by: Kevin Locke commit 383631b514c3d42b724640f57644eea276cc418c Author: Field G. Van Zee Date: Thu Jul 31 14:51:48 2014 -0500 Redefined bit field macros with bitshift operator. Details: - Redefined many of the macros that define bit fields and bit values in the obj_t info field using the bitshift operator (<<). This makes it easier to reorder bit fields, or expand existing bit fields, or add new fields. The bitshifting should be evaluated by the compiler at compile-time. commit 137143345dc93cc9a83da5ba88b25bac7502de86 Author: Field G. Van Zee Date: Thu Jul 31 12:12:45 2014 -0500 Reimplemented unit blocksize fix in prev commit. Details: - Instead of inferring the storage format of the micro-panels from within the packm variants, we now pass in a bool_t value that denotes whether the packed matrix contains row-stored column panels or column-stored row panels. This value can then be tested more easily inside the main packm variant loop. - Renumbered pack_t schema values in bli_type_defs.h so that there are now five bits, each with different meaning: - 4: packed or not packed? - 3: packed for 3m? - 2: packed for 4m? - 1: packed to panels? - 0: stored by rows or columns? - Added new macros that test for status of above bits in schema bit subfield, and renamed some existing macros related to 4m/3m. commit a51e32ec061941cd10119ea80115c82a40b1673f Author: Field G. Van Zee Date: Wed Jul 30 10:41:48 2014 -0500 Fixed unit register blocksize brokenness. Details: - Fixed a breakdown in BLIS's ability to differentiate between row-stored and column-stored micro-panels when MR or NR is unit. When either register blocksize (or both) is equal to one, inspecting the strides of the affected packed micro-panel is no longer sufficient to determine whether the micro-panel is a row-stored column panel or a column-stored row panel (because both strides are unit). At that point, dimension information is necessary when invoking the bli_is_row_stored_f() and bli_is_col_stored_f() macros (and their "obj" counterparts). Thanks to Ilya Polkovnichenko for reporting this bug. - Added panel dimensions (m and n) to obj_t, which are set in packm_init() and then passed into the blocked variants to support the aforementioned update. commit c2732272f0ac680a0ad19fa9db5d587398a1479a Author: Field G. Van Zee Date: Tue Jul 29 16:37:18 2014 -0500 Removed old/unused packm variants. commit b97fa9a5a70fe0123e5eebd999b947461d38445f Author: Field G. Van Zee Date: Sun Jul 27 18:54:09 2014 -0500 Minor usage update to build/bump-version.sh. commit b18ba5f62d98629cdd519ff4c96fc67ec1a62fb9 Author: Field G. Van Zee Date: Sun Jul 27 18:52:05 2014 -0500 Added missing 'bla_' prefix to r_imag(), d_imag(). Details: - Added "bla_" to f2c functions r_imag() and d_imag(). Thanks to Murtaza Ali for pointing the mis-named functions. commit af7a8e6c042cade452130a6729377f1a3ef4e19e Author: Field G. Van Zee Date: Sun Jul 27 18:20:13 2014 -0500 CHANGELOG update (0.1.4) commit a7537071b152ecff671f8716595d37dc09e4fd51 Author: Field G. Van Zee Date: Sun Jul 27 18:20:12 2014 -0500 Version file update (0.1.4) commit acff74041bf02c7b9fdfa24b507bca782a4c5fce Merge: cdb9413e 47b243ef Author: Tyler Smith Date: Wed Jul 23 15:07:30 2014 -0500 Merge branch 'master' of https://github.com/flame/blis commit cdb9413e140f8a198666250ec88fa34b5425a9c3 Author: Tyler Smith Date: Wed Jul 23 15:05:15 2014 -0500 Enabled threading for a couple more loops in TRSM JC loop is now enabled for the left-sided case IC loop is now enabled for the right-sided case commit 47b243ef08f4101de3d936f2373343e67eaa4dd5 Author: Field G. Van Zee Date: Wed Jul 23 13:41:13 2014 -0500 Call setid for early return from herk/her2k. Details: - Added setid call (to zero imaginary parts of diagonal elements) to early return branches of herk_front() and her2k_front() for cases where alpha is zero. Thanks to Murtaza Ali for suggesting this fix. - Comment update. commit 3e7b0db5b0e24f5fd66c60bacabc019885ddbec5 Merge: 2f8a357d ed3e33d5 Author: Tyler Smith Date: Wed Jul 23 13:40:44 2014 -0500 Merge branch 'master' of https://github.com/flame/blis commit 2f8a357de5fb55163a969d888cf059f24b78125c Author: Tyler Smith Date: Wed Jul 23 13:40:12 2014 -0500 Some TRSM threading fixes/additions commit ed3e33d548047be3283ff41268fdf716563bc542 Author: Field G. Van Zee Date: Tue Jul 22 14:40:43 2014 -0500 Tweaked behavior of herk, her2k for BLAS compat. Details: - Updated herk_front() and her2k_front() to explicitly set the imaginary components of the diagonal entries of C to zero after the computation is complete. This is needed in case downstream applications read the full diagonal entries (i.e., including imaginary part), which could, in the absence of this modification, accumulate numerical error from subsequent rank-k/rank-2k updates. - Updated BLAS compatibility wrappers for herk and her2k to return early if: n == 0 || ( ( alpha == 0 || k == 0 ) && beta == 1 ) This also results in the imaginary components of diagonal entries NOT being set to zero (see above), which is consistent with BLAS. - Updated mkherm to use setid instead of an inlined loop over the diagonal. commit ea59a5c93cde1467a3715abc53dda4aecf961873 Author: Field G. Van Zee Date: Tue Jul 22 14:36:02 2014 -0500 Added new level-1d operation: setid. Details: - Defined a new level-1d operation, setid, which sets the imaginary elements of an object's diagonal to a single scalar. This can be useful, for example, when trying to make the diagonal of a Hermitian matrix real-valued. commit 8965a965931318619ceaebd7c32edccf3022d0c7 Merge: 1785efb5 5b73e80b Author: Field G. Van Zee Date: Tue Jul 22 14:34:32 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 1785efb5420bc7b9c850a068cb5d99837071e877 Author: Field G. Van Zee Date: Tue Jul 22 14:33:01 2014 -0500 Minor improvements to invertd and setd. Details: - Added missing call to invertd_check() from front-end. - Changed setd front-end call of scald_check() to setd_check(). commit 5b73e80b71c054c1945a06aff044ef629bc1a9a0 Merge: a41e68e0 20690fe3 Author: Field G. Van Zee Date: Fri Jul 18 12:21:20 2014 -0500 Merge pull request #16 from Maratyszcza/emscripten Emscripten port commit a41e68e09e73b999fab0bb430a43dccfc63aab45 Author: Field G. Van Zee Date: Thu Jul 17 13:25:56 2014 -0500 Reimplemented BLIS initialization/finalization. Details: - Rewrote bli_init() and bli_finalize() with OpenMP critical sections for thread-safety. Also added lots of explanatory comments. - Renamed bli_init_safe() and bli_finalize_safe() with the _auto() suffix, and reimplemented for simplicity. Updated all invocations in BLAS compatibility layer to use _auto() suffix. commit 36358948ea75074bda32a9f8c008f835b87d21db Author: Field G. Van Zee Date: Thu Jul 17 10:58:10 2014 -0500 Retired frame/3/gemm/other directory. Details: - Removed frame/3/gemm/other directory, which contained some outdated and/or experimental variants. commit c73261f17edf589e76bdbe297702a1fbbd69275f Author: Field G. Van Zee Date: Mon Jul 14 16:23:51 2014 -0500 More minor cleanups post-copyright update. commit 2a09d24463d358be6243b24f112fad057c2aefe0 Author: Field G. Van Zee Date: Mon Jul 14 16:17:09 2014 -0500 Reverted power7 symlinks destroyed by sed script. Details: - Reverted two symlinks, in kernels/power7/3/test, back to being symlinks after recursive-sed.sh mistakenly replaced them with copies of the actual files to which they referred. Meant to include this in previous commit. commit 7ed415824d3b2e78541b6f64e404ca5347c06d3d Author: Field G. Van Zee Date: Mon Jul 14 16:14:33 2014 -0500 Updated copyright headers (continued). Details: - Inserted "at Austin" into third clause of license declarations. Meant to include this change in previous commit. commit 5c2c6c85616834ff2716ece083118201d9df6dde Author: Field G. Van Zee Date: Mon Jul 14 16:05:03 2014 -0500 Updated copyright headers to contain "at Austin". Details: - Updated copyright headers to include "at Austin" in the name of the University of Texas. - Updated the copyright years of a few headers to 2014 (from 2011 and 2012). commit fcec68cda3f6e90ae055e7304e6674c1c5c8d010 Merge: 94c0df79 4a20ed1a Author: Field G. Van Zee Date: Mon Jul 14 11:35:34 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 94c0df797eda377931f29a41ba6a89c0ed58daca Author: Field G. Van Zee Date: Mon Jul 14 11:24:36 2014 -0500 Changed order of zero dim / error checking. Details: - Updated level-2 and level-3 internal back-ends so that the operation's _check() function is called BEFORE any attempt to return early due to the presence of zero dimensions. This ordering makes more sense because (for example) object dimensions should match even if one of them is zero. Previously, a dimension mismatch could result in an early return with no error message. - Updated bli_check_object_buffer() so that NULL buffers result in an error only if the object is dimensionally non-empty (i.e., only if both of the object's dimensions are non-zero). This allows BLIS operations to be performed on dimensionally empty objects (i.e., where at least one dimension is zero). - Updated the error message associated with bli_check_object_buffer() to mention the newly relaxed constraint mentioned above, vis-a-vis non-zero dimensions. commit 20690fe3018ce17c8df61ce0bffecaa7911dc3a5 Author: Marat Dukhan Date: Sun Jul 13 22:50:56 2014 -0700 Emscripten port commit 4a20ed1a3f5e9e5232df30aa0e568e6c00c56ce1 Merge: 6a515e98 8ccdfaef Author: Field G. Van Zee Date: Sun Jul 13 17:45:01 2014 -0500 Merge pull request #14 from Maratyszcza/master Support "make test" for PNaCl configuration commit 6a515e988f2ae1628258a6dec2c0e9cf2d04790f Author: Field G. Van Zee Date: Sun Jul 13 17:38:33 2014 -0500 Implemented dsdot() and sdsdot() in compat layer. Details: - Replaced "not yet implemented" error messages in dsdot() and sdsdot() with actual implementations. (These routines are so rarely used that this log message will probably lead to some people learning of their existence for the first time.) commit 255668ddd1004552c6cc65035ec6486671ce99bb Author: Field G. Van Zee Date: Sun Jul 13 17:30:44 2014 -0500 Inserted gemv beta-scaling bug into compat layer. Details: - BLAS has a peculiar bug (or feature) whereby calling gemv on a vector y of non-zero length and a vector x of zero length results in no action. Given that the operation is y := beta*y + A*x, many (most?) individuals would expect vector y to still be scaled by beta. BLIS, when called natively, handles these cases intuitively (with beta scaling). Unfortunately, many BLAS test suites actually check for the way this situation is handled. Therefore, we have decided to implement this "bug" in the compatibility layer so as to provide "bug-for-bug" compatibility with BLAS. commit 570a154581bdb353fa13a219c7cb3c81d3dceffd Author: Field G. Van Zee Date: Sat Jul 12 17:51:05 2014 -0500 Comment/formatting updates to build scripts. Details: - Minor updates to comments and formatting in bump-version.sh and update-version-file.sh scripts. commit 26cd81990631ff799791629206e068126ff9e3a1 Author: Field G. Van Zee Date: Thu Jul 10 13:16:07 2014 -0500 Added bli_info_*() query functions. Details: - Added a new API family, bli_info_*(), which can be used to query information about how BLIS was configured. Most of these values are returned as gint_t, with the exception of the version string which is char*. - Changed how the testsuite driver queries information about how BLIS was configured (from using macro constants directly to using the new bli_info API). - Removed bli_version.c and its header file. - Added STRINGIFY_INT() macro to bli_macro_defs.h - Renamed info_t type in bli_type_defs.h to objbits_t (not because of an actual naming conflict, but because the name 'info_t' would now be somewhat misleading in the presence of the new bli_info API, as the two are unrelated). commit 970b43141697d8c31a033f59513bb59d7cc78ab0 Author: Field G. Van Zee Date: Thu Jul 10 09:30:00 2014 -0500 Minor bugfixes to BLAS compatibility layer. Details: - Changed bla_amax.c so that i?amax() routines now correctly return 0 if ( n < 1 || incx <= 0 ). - Changed bla_rotg.c and bla_rotmg.c to use bli_fabs() macro instead of f2c's abs() macro for float and double cases. - Thanks to Murtaza Ali for suggesting the two fixes above. - Updated label of fnormv to normfv in testsuite/input.operations. commit 8ccdfaef4c42ad8957af8607a1a9ee29b9277d4b Author: Marat Dukhan Date: Tue Jul 8 23:14:36 2014 -0700 Replicated logic from testsuite/Makefile in top-level Makefile to support make test commit caa6507ff3724c80d60987f309b8bbc5b50a9841 Author: Field G. Van Zee Date: Tue Jul 8 10:25:27 2014 -0500 Minor cleanup to standalone test drivers. Details: - Very minor code changes to standalone test drivers in 'test' directory. - Added *.so files to '.gitignore'. commit 6c65e9a58fe55990ebb99ec3986443e18af35338 Merge: cb12e456 daca500d Author: Field G. Van Zee Date: Tue Jul 8 10:13:49 2014 -0500 Merge branch 'master' of github.com:flame/blis commit cb12e456f94c196c093e52f02a7cbca0032fc86e Author: Field G. Van Zee Date: Tue Jul 8 10:07:46 2014 -0500 Fixed possible level-3 inf/NaN issue when beta=0. Details: - Redefined xpbys_mxn and xpbys_mxn_u/_l macros to employ a copy (instead of scaling by beta) when beta is zero. This will stamp out any possible infs or NaNs in the output matrix, if it happens to be uninitialized. Thanks to Tony Kelman for isolating this bug. commit daca500db5e2448ba0da8047b75eb0f88d9f40e3 Merge: ab3bc915 47023502 Author: Tyler Smith Date: Thu Jul 3 12:52:52 2014 -0500 Merge branch 'master' of http://github.com/flame/blis commit 4702350278af31f662b458127777dd4d85a3192f Author: Field G. Van Zee Date: Thu Jul 3 11:48:23 2014 -0500 Defined _ukernel_void() wrappers to micro-kernels. Details: - Added wrappers for micro-kernels so that users may invoke the micro-kernels without knowing what the function names actually are. This is useful when an application wishes to call the micro-kernel from a shared library instance of BLIS, where the application may not necessarily have the luxury of grabbing the micro-kernel name(s) from C preprocessor macros at compile-time. Also, since the wrappers use void* pointers, one's environment does not need to be aware of some BLIS types such as scomplex and dcomplex. These wrappers now join the level-1 and level-1f kernel wrappers, which pre-dated this commit. - Removed the wrapper definitions and prototypes from the micro-kernel test suite modules, and replaced calls to them with calls to the new wrappers mentioned above. commit ab3bc9153b914fbaf259e15b66c91d628e7c8661 Author: Tyler Smith Date: Thu Jul 3 11:19:43 2014 -0500 Fixed a bug for TRSM when BLIS_ENABLE_MULTITHREADING is not set but the multithreading environment variables are turned on commit b8134b720b985783ee6a582a3eb5d6c51f00d051 Author: Tyler Smith Date: Wed Jul 2 16:02:39 2014 -0500 Quick and dirty multithreading for TRSM Should work fine for small number of threads (up to 8 or maybe even 16). However, performance is yet untested. This parallelizes the "JR" loop for the left sided cases and the "IR" loop for the right sided cases. Future work is to parallelize the outer loops as well. commit e8ef69692831db07ddbe9485a5e504ac3f03e496 Author: Field G. Van Zee Date: Wed Jul 2 14:59:27 2014 -0500 Added shared library support to build system. Details: - Modified top-level Makefile to support building shared (dynamic) libraries. - Updated most configurations' make_defs.mk files to include necessary compiler/linker flags needed by top-level Makefile. - Note that by default, all configurations presently do NOT build shared libraries. To enable, one must change the value of BLIS_ENABLE_DYNAMIC_BUILD to 'yes'. commit b80df0f2cffb015da02e70a82b8512da9891ab67 Author: Field G. Van Zee Date: Mon Jun 23 13:52:39 2014 -0500 Added bump-version.sh script to 'build' directory. Details: - Added a bash script, bump-version.sh, to aid in incrementing the BLIS version string. commit 9ef1f1e21d083697fc730e48d7d9169c201f3da2 Author: Field G. Van Zee Date: Mon Jun 23 13:48:17 2014 -0500 CHANGELOG update (0.1.3) commit 036cc634918463b1caa0fd89c9a211f2f5639af7 Author: Field G. Van Zee Date: Mon Jun 23 13:48:17 2014 -0500 Version file update (0.1.3) commit 09d9a3bf6763932d9f571085b2cfd1b8631eccba Author: Field G. Van Zee Date: Mon Jun 23 13:43:26 2014 -0500 Reverting version file to test new version script. Details: - Changed version file contents to 0.1.2 so that I can test out a new version file bumping script. commit ebb33965981dcb2b0bdee5fc7fdf6c959420f311 Author: Field G. Van Zee Date: Mon Jun 23 11:22:50 2014 -0500 Added 'version' file. commit 2cb9a5501a3cbeb6692cf68e896087ba73b6af69 Author: Field G. Van Zee Date: Mon Jun 23 10:42:29 2014 -0500 Removed 'version' from .gitignore file. commit b40dcefc5ee31f67aa3990e2e9d2ef8ed1386a25 Merge: 7101a8ee b693b0cd Author: Field G. Van Zee Date: Mon Jun 23 10:39:05 2014 -0500 Merge pull request #11 from Maratyszcza/stable [sc]axpy kernels for PNaCl commit b693b0cddcfb41450e3c09a3ab97acb44c1ccdec Author: Marat Dukhan Date: Sun Jun 22 13:44:25 2014 -0700 [SC]AXPY kernels for PNaCl commit 7101a8eec0327d6c3a7eb36eb4b0fd45c1c6d162 Merge: ad48dca2 020a831b Author: Field G. Van Zee Date: Thu Jun 19 21:46:50 2014 -0500 Merge pull request #10 from Maratyszcza/stable Portable Native Client port commit 020a831bc5f61744cb8354886aa679b99b1285f6 Author: Marat Dukhan Date: Thu Jun 19 00:58:26 2014 -0700 Code clean-up in PNaCl port commit 491be4f91ed725522f5cc7184053857c6c376ada Author: Marat Dukhan Date: Thu Jun 19 00:45:44 2014 -0700 Optimized dot product kernels for PNaCl commit 4b8e71aab80182873a2e138eb07902b8d8fd5480 Author: Marat Dukhan Date: Thu Jun 19 00:43:25 2014 -0700 Use AR rcs flags for PNaCl target to avoid warning commit 031deb2a5c718d569bde842590a791b812f4cf1d Author: Marat Dukhan Date: Wed Jun 18 03:11:34 2014 -0700 PNaCl configuration: use pnacl-ar instead or ar (fixes build issue on Mac) commit 68a02976e3c3638f0a9821342e269a1743e3ace3 Author: Marat Dukhan Date: Wed Jun 18 03:10:25 2014 -0700 Compile pnacl configuration in GNU11 mode to avoid warning about non-standard features commit 6f8462eb0ec278b89731e73ef583386a3371d095 Author: Marat Dukhan Date: Wed Jun 18 03:08:46 2014 -0700 Fix inconsistent VERBOSE macro in Makefile commit b2ffb4de8b6872cb23537ad282e557d11dcd9c8b Author: Marat Dukhan Date: Sun Jun 15 18:41:30 2014 -0400 Reformatted PNaCl GEMM kernels commit 6de2d472d98baa215264a776f3d5291780a6a085 Author: Marat Dukhan Date: Sun Jun 15 08:44:31 2014 -0400 CGEMM and ZGEMM kernels for PNaCl commit f064711a5e6fb3852c17c7520909b09dc27665f2 Author: Marat Dukhan Date: Sun Jun 15 06:27:37 2014 -0400 SGEMM and DGEMM kernels for PNaCl commit ad48dca22913a363899f0bef45553898718eebb1 Merge: ee2b6792 7118f87e Author: Field G. Van Zee Date: Sat Jun 14 15:10:13 2014 -0500 Merge pull request #9 from tkelman/memalign_windows Use _aligned_malloc instead of posix_memalign on Windows commit 7118f87e18b4941423472afc00215c1d1f2a1fcd Author: Tony Kelman Date: Sat Jun 14 06:53:20 2014 -0700 Use _aligned_malloc instead of posix_memalign on Windows commit ee2b679281ca45fb40b2198e293bc3bc3d446632 Author: Tyler Smith Date: Fri Jun 6 12:41:55 2014 -0500 Only include omp.h if BLIS_ENABLE_OPENMP is set commit 19c05dfaac43c627f86e897c8c00f1f9440754aa Author: Field G. Van Zee Date: Thu Jun 5 10:54:16 2014 -0500 CHANGELOG update (for 0.1.2). commit 00f232f8ed1f7c41619b12ebf779ebe2c3b2d3cd Author: Tyler Smith Date: Mon Jun 2 13:40:57 2014 -0500 Added single-precision micro-kernel for Knights Corner aka MIC aka Xeon Phi commit 3fc60e491426f6248c0feae88d971e4d1f88fb95 Author: Field G. Van Zee Date: Wed May 21 11:34:42 2014 -0500 Fixed ldim alignment bug in core2 gemm ukernel. Details: - Fixed a bug in the dunnington/core2 gemm micro-kernels that resulted in a segmentation fault if a column-stored matrix's starting address was aligned, but its leading dimension was such that its second column was unaligned. Basically, the micro-kernel was assuming that aligned load instructions were safe when they actually were not. An extra condition that checks the alignment of cs_c (ie: the leading dimension in the column storage case) has now been added. Thanks to Michael Lehn for reporting this bug. commit 77a2d8dac8b242d7a202c9aabda3927ab68cf987 Merge: 8c5d6071 21fb0893 Author: Field G. Van Zee Date: Tue May 20 09:53:19 2014 -0500 Merge pull request #8 from tlrmchlsmth/master Added multithreading to most level-3 operations. commit 21fb089387ee7c87f6dc53b0f60f68b48d3ff3e8 Author: Tyler Smith Date: Mon May 19 20:38:55 2014 -0700 Reverting changes dunnington and reference configs Now they are unchanged from the main branch of BLIS commit 8a0ef0e0db5880730425926f8ba56b457a2ba764 Author: Tyler Smith Date: Fri May 16 13:44:14 2014 -0500 Fixed rounding error in bli_get_range_weighted commit 0b4b1680334528b1b60bc696537600f763198e92 Author: Tyler Smith Date: Fri May 16 12:23:37 2014 -0500 Fixed bug with disabling JC loop threading for right sided trmm commit 5c048a90d8dfa1dbde4e45fbc10ffcbdfe59d960 Author: Tyler Smith Date: Wed May 14 16:20:06 2014 -0500 Disabled parallelism for right-sided TRMM JC loop The loop has dependent iterations. commit 13a4c717ed0e273359dbaf5554cc4fa70b087d71 Author: Tyler Smith Date: Wed May 14 14:59:04 2014 -0500 Fixed bug with bli_get_range_weighted commit 45957cc7745e9bb1698408d72f53ef192e960820 Author: Tyler Smith Date: Tue May 13 17:14:46 2014 -0500 Allowed threading to be turned off No longer requires OpenMP to compile Define the following in bli_config.h in order to enable multithreading: BLIS_ENABLE_MULTITHREADING BLIS_ENABLE_OPENMP Also fixes a bug with bli_get_range_weighted commit bd1dc98ce599d74513a553fe3b37a2ebca1c3812 Author: Tyler Smith Date: Mon May 12 17:26:19 2014 -0500 Disabled multithreading of the kc loop commit 456df0372170bd7ca2c7e2d85365a69f1f04de88 Author: Tyler Smith Date: Wed Apr 30 12:28:00 2014 -0500 Replaced register blocksize hack with querying the register blocksize for determining parallelism granularity commit f4fdfe8fc573553eb36795b79cdf681270dab71b Merge: 31bb065b 8c5d6071 Author: Tyler Smith Date: Wed Apr 30 11:46:35 2014 -0500 Merge http://github.com/flame/blis commit 8c5d6071e24ba10a53669390a47287e86ff354ce Author: Field G. Van Zee Date: Tue Apr 29 12:26:12 2014 -0500 Added _check() routines for fprint[mv], rand[mv]. Details: - Added _check() routines for fprintm, fprintv, randm, and randv. - Added invocations to the above routines from their respective front-ends. commit 262cdabcc885bcf6636f4d8bb7d320f95e81d820 Author: Field G. Van Zee Date: Mon Apr 28 16:48:25 2014 -0500 Changed treatment of NULL object buffers. Details: - Relaxed the constraint in bli_obj_attach_buffer_check(), which required the buffer address being attached to be non-NULL. This is acceptable because the user was already able to create and use objects with NULL buffers (via bli_obj_create_without_buffer(), which initializes the buffer to NULL). - Inserted calls to newly defined function, bli_check_object_buffer(), into nearly all operations' _check() or _int_check() functions. This allows BLIS to abort peacefully if a computational routine is called with an object containing a NULL buffer. By contrast, under such conditions, BLAS would typically fail with a segmentation fault. - Within operation front-ends, moved the calls to _check()/_int_check() so that zero dimensions are checked first (and if found, execution returns with trivial or no computation). This resolves issue #7. Thanks to Jack Poulson for reporting this bug. commit 31bb065ba40ae0c5a614e743b8025abca012b99e Merge: 20e24430 7c619599 Author: Tyler Smith Date: Wed Apr 23 12:30:19 2014 -0500 Merge http://github.com/flame/blis commit 7c61959955c8ba78160d0ed4d1979022029d963b Author: Field G. Van Zee Date: Thu Apr 10 17:18:36 2014 -0500 Can now query register blocksizes from blk algs. Details: - Added a new field to blksz_t objects that allows one to attach a sub-object. Doing this allows us to associate a register blocksize with any given cache blocksize. That way, the register blocksize can be queried wherever the cache blocksize would normally be accessible (e.g. a blocked algorithm). - Modified bli_gemm_cntl.c (and 4m/3m variants) so that the register blocksizes are attached to the cache blocksizes after they are created. commit 58671597d3d450817b2eda576c05ed6dadd8af6d Author: Field G. Van Zee Date: Thu Apr 10 15:35:30 2014 -0500 Minor cleanups to level-2 _cntl.c files. Details: - Changed level-2 _cntl.c files so that the blocksizes for gemv are imported and used, rather than blocksizes being declared locally. - Whitespace changes to gemv_cntl.c and gemm_cntl.c files (as well as 4m/3m variants). - Removed test/old/test_blis2.c. commit 20e24430a772bc0fbaf24dec2f8c544096fd3f4e Author: Tyler Michael Smith Date: Tue Apr 8 17:50:44 2014 +0000 Some fixes for the bgq kernels commit bde697f75ec1e7f2decebee0c9bd620b4c134cd5 Author: Tyler Smith Date: Fri Apr 4 16:43:44 2014 -0500 Add -openmp to ldflags as well commit c332be8cd471eeace7b4fa4ae7443088b6a68ec3 Author: Tyler Smith Date: Fri Apr 4 16:37:50 2014 -0500 Added -openmp flag to Xeon Phi build for convenience commit e7ca9e4b4a24d585c9aec8293fc7bb79e4171ad0 Author: Tyler Smith Date: Fri Apr 4 16:31:15 2014 -0500 Used BLIS_DEFAULT_*_MR for rounding partitioning instead of BLIS_DEFAULT_*_MC commit 7b9b228c6fa4cfb70b1ebb855b009a036e85fac3 Author: Tyler Smith Date: Fri Apr 4 16:29:10 2014 -0500 Fix for tree barrier freeing bug commit 5ec93bd9a76096312d51c326ccde1e9bd0a436ab Author: Tyler Smith Date: Fri Apr 4 15:09:10 2014 -0500 Bunch of minor fixes Removed barrier after unpackm in all level3 blocked variants Now there is an implicit barrier inside unpackm that only occurs if C is packed (which is usually not the case) Moved the enabling of the tree barriers into bli_config.h Fed the default MR and NR for double precision into bli_get_range instead of the number 8 commit 575fb9b0b08f3bdb56ccde056da619d1585617c1 Author: Tyler Smith Date: Fri Apr 4 12:13:29 2014 -0500 Changed default blocking factor to default double precision MR and NR commit ab9c7880335c281432d5809fe0dec46753d22569 Author: Tyler Smith Date: Fri Apr 4 11:38:11 2014 -0500 Added faster tree barriers necessary for performance for Xeon Phi Fixed up some stuff in the thread info free functions Disabled threading for TRSM so that it actually works when threading environment variables are set commit ec58a7923cccac08632670caadf3cf6ff5dce766 Author: Tyler Smith Date: Fri Apr 4 10:22:48 2014 -0500 Freeing thread info paths. Also made herk IC and JC loops do weighted partitioning commit 2b6848b2397d6d84ca4e5f792fc51ad05e351a36 Merge: 4e3eb39a 21a0efb3 Author: Tyler Smith Date: Fri Apr 4 09:54:54 2014 -0500 Merge http://github.com/flame/blis Conflicts: kernels/bgq/1/bli_axpyv_opt_var1.c kernels/bgq/1/bli_dotv_opt_var1.c commit 4e3eb39aca4df0b9fdc003d468f368a2f2ba597d Author: Tyler Michael Smith Date: Fri Apr 4 14:50:03 2014 +0000 Some fixes to the bgq config MR and NR for double complex were wrong Default fusing factor for double precision was wrong as well commit 21a0efb33d7435139e9c43c1a4787a6bff533e26 Author: Field G. Van Zee Date: Thu Apr 3 16:38:44 2014 -0500 Fixed follow-up to issue #6. commit c318157a9bee8ea6e59be16f99f65d9271fe0d27 Author: Field G. Van Zee Date: Thu Apr 3 16:24:34 2014 -0500 Fixed issue #6 (incorrect 'restrict' usage). Details: - Fixed improper usage of restrict keyword in axpyv and dotv bgq kernels. (However, there may be other instances of similar misuse elsewhere in BLIS.) Thanks to Jeff Hammond for reporting this issue. commit b5150a1bf3bd89598e2b3aeac110eb5b44ac6c12 Author: Field G. Van Zee Date: Thu Apr 3 12:25:45 2014 -0500 Added #include "arm_neon.h" to ARM gemm ukernel. Details: - Inserted #include "arm_neon.h" into gemm ukernel source file for arm/neon. Thanks to Jean-Michel Hautbois for suggesting this fix. commit 2041c264517b6c590fd4f7e8253e6911b622d1c3 Author: Tyler Smith Date: Thu Apr 3 10:30:03 2014 -0500 Added barriers needed prior to doing scalar reset for rank-k updates. commit 47a90e69dfde3f4f8fdf90654248a6b499fbadbc Author: Field G. Van Zee Date: Tue Apr 1 14:34:31 2014 -0500 Attempted to fix uninitialized variable warnings. Details: - Added initialization statements to various macros used in level 1m and 1m-like operations. I wasn't able to reproduce the reported behavior, so hopefully this takes care of it. Thanks to Jeff Hammond for the report. commit d27b4f690c14b1f836f8c7a3c0e91e09d852f02e Author: Field G. Van Zee Date: Tue Apr 1 12:57:24 2014 -0500 Use generic paths for toolchain in POWER7. Details: - Fixed issue #4. Thanks to Jeff Hammond for contributing changes. commit 1584ae1c83c3a8c1af76acb46404747507650f19 Author: Tyler Smith Date: Fri Mar 28 15:15:48 2014 -0500 Fixed race condition involving scalar reset commit 459dde4acc09e49380da58fb7b246db488884ad9 Author: Tyler Smith Date: Thu Mar 27 17:06:45 2014 -0500 Made barrier after packing implicit. This also fixed a bug where barriers in the blocked variants were inserted after the inner packing routines, but not the outer packing routines. This allowed, for instance, the block of B to not be finished being packed before computation to occur. commit 9f78ec6e7e95fcad89a167b27cad7e2d74b6d122 Author: Tyler Smith Date: Thu Mar 27 14:18:46 2014 -0500 Some fixes for the internal functions, was innappropriately only having thread chief do some things. commit a6fd48345424e097f71652be013aa897e098b41e Author: Tyler Michael Smith Date: Wed Mar 26 17:19:46 2014 +0000 Added test drivers for level 3 BLAS that run tests in parallel using MPI commit 73b3db594864be0f9be9a0eb29bf961fa9c95f29 Author: Tyler Michael Smith Date: Wed Mar 26 15:39:05 2014 +0000 Some fixes for the bgq configuration commit f0824a04fc75e231c3a3d7757fa4e7294173282f Author: Tyler Smith Date: Mon Mar 24 15:21:42 2014 -0500 Initial commit to enable threading in TRSM, Also enabled weighted partitioning for herk, trmm Fixed bug where multiple threads would try to modify the same state in the internal level 3 functions Correctly computed a_next and b_next for gemm, herk macrokernels a_next and b_next point to the current micropanels in trmm commit 23d9eab354fbc88165889832955e126772bf8488 Merge: 5d5dc2ee fd3e32a5 Author: Tyler Smith Date: Thu Mar 20 16:54:35 2014 -0500 Merge https://github.com/flame/blis commit 5d5dc2eedef2f7c90d61371a1b457be5c06cf583 Author: Tyler Smith Date: Thu Mar 20 16:43:36 2014 -0500 Parallelized trmm and trmm3 Also fixed bugs in packm commit fd3e32a5f419fa412f46afe4dd1c3a26e15f3eb4 Author: Field G. Van Zee Date: Thu Mar 20 13:59:48 2014 -0500 Refined INSERT_GENTFUNC macro usage. Details: - Defined new INSERT_GENTFUNC macros so that the macro always takes exactly the number of arguments needed for the particular operation or variant being defined. Many operations were using INSERT_GENTFUNC macros that expected one auxiliary argument even though none were needed. Those instances have now been updated. Most of these instances were in the level-0 and -1v operations, as well as some operations defined in frame/util. commit 9b0e715f29338a1a1d6445907d2445c35f011121 Author: Field G. Van Zee Date: Wed Mar 19 15:47:54 2014 -0500 Minor simplifications to trmm, trsm macro-kernels. Details: - Simplified some code that would have allowed the diagonal of a trmm or trsm triangular matrix to intersect the short end of a micro-panel. This is disallowed via higher-level constraints on cache blocksizes, so this code was never needed and only served to obfuscate. - Updated some comments in trmm, trsm macro-kernels. commit a3902750b9ab4923433f7e353f3669c3c419f8e4 Author: Field G. Van Zee Date: Wed Mar 19 12:35:17 2014 -0500 Reorganized norm operations. Details: - Completely reoganized norm operations: - Renames: - fnormsc, fnormv, fnormm -> normfsc, normfv, normfm (2-norm) - absumv -> norm1v (vector 1-norm) - New operations: - norm1m (matrix 1-norm) - normiv, normim (infinity-norm) - amaxv (BLAS-like absolute maximum value index) - asumv (BLAS-like absolute sum) - Deprecated absumm, as it did not correspond to any actual norm. (However, an inlined version now exists in the testsuite module for randm.) commit c0140cb752f27e99742f85d23be2181c00a1335e Author: Tyler Smith Date: Wed Mar 19 11:21:16 2014 -0500 Fixed packm variants 3 and 4 where every thread was trying to manipulate the same state Now just performed by the master thread. commit fb42983bd9943711baa7d1c6496de1215bb816ef Author: Tyler Smith Date: Tue Mar 18 16:37:28 2014 -0500 Fixed a barrier bug and a thread decorator bug commit aa2405f8b23d0f8d2ec04790882f2176ef2e8fd8 Author: Tyler Smith Date: Tue Mar 18 15:23:09 2014 -0500 Fixing function pointer issues with thread decorator commit ec8b88f93533942d3711191873310e7ff281bda6 Author: Tyler Smith Date: Tue Mar 18 14:35:37 2014 -0500 Enabled threading for packm blocked variants 3 and 4 commit 0ac534cdf657bbf04601abfe719ba2887aab5da7 Author: Tyler Smith Date: Tue Mar 18 13:26:27 2014 -0500 Added decorator for calling parallelized intermal functions Will allow for easy support for different threading models commit 5296f58975f7d351f88909cc80b6d0cffd73def7 Author: Tyler Smith Date: Mon Mar 17 17:15:35 2014 -0500 Fixing some bugs with herk parallelization commit c51d0110831eb89361b4720bf7ed75edbd26ebce Author: Tyler Smith Date: Mon Mar 17 15:00:47 2014 -0500 Initial multithreading support for HERK commit c720b141568d1f289146bf34ded08001f2c0dfbb Author: Tyler Smith Date: Mon Mar 17 11:39:32 2014 -0500 Switched to using environment variables to control threading. The environment variables all follow the format BLIS_X_NT, where X is the index of the loop as described in our paper Anatomy of High Performance Many-Threaded Matrix Multiplication. These indices are IR, JR, IC, KC, and JC. Also enabled parallelism for hemm and symm, but these are currently untested. commit 92233cf64274b27b2217c5cfffe75443ff6137a4 Author: Tyler Smith Date: Tue Mar 11 14:16:08 2014 -0500 Some fixes to gemm thread info tree creation, Changed microkernel tests to use the new BLIS_PACKM_SINGLE_THREADED instead of BLIS_SINGLE_THREADED commit 020f80c30289d8bcaa688bf600b01fae9b23b54f Author: Tyler Smith Date: Tue Mar 11 12:08:17 2014 -0500 Added files specific to threading for gemm and packm operations commit 8d8f4352a41926bc923e47be836365b6b726aff2 Author: Tyler Smith Date: Mon Mar 10 15:47:28 2014 -0500 Added single threaded thread info data structures specifically for gemm and packm commit 0e8677761175189583ca7d855e24b2bbdd2dada8 Merge: 2e727a02 b3bff631 Author: Tyler Smith Date: Mon Mar 10 15:16:21 2014 -0500 Merge branch 'master' of https://github.com/tlrmchlsmth/blis commit 2e727a025a8f796d2b6bd14f489d0ee72e7d1fc7 Author: Tyler Smith Date: Mon Mar 10 15:14:33 2014 -0500 Modifying the thread info data structures This change makes each operation have its own thread info type, allowing more fine control of threading in operations that have different types of suboperations commit a770590cf21a459f04bf941c58ee2afd272cc441 Author: Field G. Van Zee Date: Mon Mar 3 14:31:44 2014 -0600 Minor fixes to sumsqv, abmaxv. Details: - Minor update to bli_sumsqv_unb_var1() to bring it up-to-date with LAPACK 3.5.0's zlassq.f, which, starting with 3.4.2, returns NaN when the vector (or matrix) contains a NaN. - Minor change to bli_abmaxv_unb_var1() to more closely mimic the behavior of netlib BLAS's izamax(). There, a "less than or equal to" operator is used in the search instead of "less than", which would change the element index returned if there were multiple maximum values. - Added macro function definitions for bli_isinf() and bli_isnan(), which are currently implemented in terms of isinf() and isnan() from math.h. commit b3bff631eadf98b15cb422fb4a8e2f855c23e8a7 Merge: 2c158fb8 e8757b03 Author: Tyler Smith Date: Thu Feb 27 16:53:24 2014 -0600 Merge https://github.com/flame/blis commit 2c158fb885c27f7b599dc1e85b57edd684f19223 Merge: e4738c48 c2b2ab62 Author: Tyler Smith Date: Thu Feb 27 16:46:23 2014 -0600 Merge https://github.com/flame/blis Conflicts: frame/1m/packm/bli_packm_blk_var1.c commit e8757b03a74f9891632242e9a90efb32150826f5 Author: Field G. Van Zee Date: Thu Feb 27 16:40:07 2014 -0600 Use "%ld" as int format specifier in fprintm. Details: - Changed "%d" to "%ld" when printing integers via bli_fprintm(). - Meant to include this in previous commit. commit c663ce3b5170fee7dfb5b528b650d70c8e932cac Author: Field G. Van Zee Date: Thu Feb 27 16:32:57 2014 -0600 Fixed various bugs when C99 complex is enabled. Details: - Fixed various bugs in packm_*_cxk(), the 4m/3m micro-kernels, and elsewhere in the framework that were not yet set up to work properly when BLIS_ENABLE_C99_COMPLEX is defined in bli_config.h - Extensive changes to f2c-derived files in frame/compat/f2c to allow C99 complex storage. Most of these changes center around accessing real and imaginary components via bli_?real()/bli_?imag() accessor macros, and setting of values via bli_?sets() assignment macros. (Thanks to Vladimir Sukarev for pointing out that _ENABLE_C99_COMPLEX was broken.) commit e4738c48e00b89391d9baa1fd0aa62d1ea2f95e6 Author: Tyler Smith Date: Thu Feb 27 16:29:46 2014 -0600 Added support for parallelism in gemm micro-kernel commit bfe214b633765ed40b57b330fbb84c332663aa40 Author: Tyler Smith Date: Thu Feb 27 15:53:10 2014 -0600 Fixed bug with parallel packing, and bug with allocating an array of thread infos In packm variant 1, the variable p_begin was incremented each iteration, causing a dependency. This dependeny was removed, allowing each iteration to be executed in parallel. Somewhere in bli_threading.c, I was allocating an array of pointers instead of an array of structs. commit 6193d9ceea552e67170dba45abde04c64271c705 Author: Tyler Smith Date: Thu Feb 27 14:09:19 2014 -0600 Fixed bug in thread trees commit ac5a2de1d17ffd460b00fee9757898525a09abae Merge: 01b125e8 bd3c7ecf Author: Tyler Smith Date: Thu Feb 27 11:59:33 2014 -0600 Merge branch 'master' of https://github.com/tlrmchlsmth/blis commit 01b125e815f19410e8e0611d088b84570e499e93 Author: Tyler Smith Date: Thu Feb 27 11:55:45 2014 -0600 First pass at adding parallelism to BLIS. Added a multithreading infrastructure that should be independent of multithreading implementation in the future. Currently, gemm blocked variants 1f and 2f, and packm variant blocked variant 1 is parallelized. commit c2b2ab62707e4174892aff3ce65f36f54878fae5 Author: Field G. Van Zee Date: Wed Feb 26 12:46:45 2014 -0600 Deprecated panel stride alignment in bli_config.h. Details: - Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE from bli_config.h of all configurations. It was already going unused in packm_init() since the recent 4m/3m commit. This setting was rarely, if ever, useful, and its existence only posed a potential risk for 4m/3m-based implementations. - Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE usage from mem_pool_macro_defs.h. - Updated comments regarding CONTIG_STRIDE_ALIGN_SIZE in template micro-kernels. commit f18aee83a5ac1b14808686fc3c5a3c846a1d99b9 Author: Field G. Van Zee Date: Tue Feb 25 17:58:42 2014 -0600 CHANGELOG update (for 0.1.1). commit fde5f1fdece19881f50b142e8611b772a647e6d2 Author: Field G. Van Zee Date: Tue Feb 25 13:34:56 2014 -0600 Added extensive support for configuration defaults. Details: - Standard names for reference kernels (levels-1v, -1f and 3) are now macro constants. Examples: BLIS_SAXPYV_KERNEL_REF BLIS_DDOTXF_KERNEL_REF BLIS_ZGEMM_UKERNEL_REF - Developers no longer have to name all datatype instances of a kernel with a common base name; [sdcz] datatype flavors of each kernel or micro-kernel (level-1v, -1f, or 3) may now be named independently. This means you can now, if you wish, encode the datatype-specific register blocksizes in the name of the micro-kernel functions. - Any datatype instances of any kernel (1v, 1f, or 3) that is left undefined in bli_kernel.h will default to the corresponding reference implementation. For example, if BLIS_DGEMM_UKERNEL is left undefined, it will be defined to be BLIS_DGEMM_UKERNEL_REF. - Developers no longer need to name level-1v/-1f kernels with multiple datatype chars to match the number of types the kernel WOULD take in a mixed type environment, as in bli_dddaxpyv_opt(). Now, one char is sufficient, as in bli_daxpyv_opt(). - There is no longer a need to define an obj_t wrapper to go along with your level-1v/-1f kernels. The framework now prvides a _kernel() function which serves as the obj_t wrapper for whatever kernels are specified (or defaulted to) via bli_kernel.h - Developers no longer need to prototype their kernels, and thus no longer need to include any prototyping headers from within bli_kernel.h. The framework now generates kernel prototypes, with the proper type signature, based on the kernel names defined (or defaulted to) via bli_kernel.h. - If the complex datatype x (of [cz]) implementation of the gemm micro- kernel is left undefined by bli_kernel.h, but its same-precision real domain equivalent IS defined, BLIS will use a 4m-based implementation for the datatype x implementations of all level-3 operations, using only the real gemm micro-kernel. commit 15b51e990f1d21333b5f7af97c211756247336e5 Merge: 6363a9f6 fc04b5eb Author: Field G. Van Zee Date: Fri Feb 21 09:04:32 2014 -0600 Merge branch 'master' of github.com:fgvanzee/blis commit fc04b5eb69868c341ce03f5ef1f02de4b8c121b0 Merge: b29e1c2b d1813c9d Author: Field G. Van Zee Date: Fri Feb 21 09:04:13 2014 -0600 Merge pull request #3 from figual/master New ARM armv7a kernels and Assembly file consideration in Makefile commit d1813c9dee34410833db5061e6588ec1a6c9ecd4 Author: Francisco Igual Date: Fri Feb 21 15:14:31 2014 +0100 Added new armv7a micro-kernels and configuration files from Werner Saar. commit 0cd098c03a000ed9426a7e9135190696da8cadbc Author: Francisco Igual Date: Fri Feb 21 15:12:30 2014 +0100 o Modified Makefile to consider .S assembly microkernels. commit 6363a9f658257fe3d814a3dce5308f807adb54a2 Author: Field G. Van Zee Date: Wed Feb 19 17:00:52 2014 -0600 Added level-3 support for complex via 4m-/3m. Details: - Added the ability to induce complex domain level-3 operations via new virtual complex micro-kernels which are implemented via only real domain micro-kernels. Two new implementations are provided: 4m and 3m. 4m implements complex matrix multiplication in terms of four real matrix multiplications, where as 3m uses only three and thus is capable of even higher (than peak) performance. However, the 3m method has somewhat weaker numerical properties, making it less desirable in general. - Further refined packing routines, which were recently revamped, and added packing functionality for 4m and 3m. - Some modifications to trmm and trsm macro-kernels to facilitate indexing into micro-panels which were packed for 4m/3m virtual kernels. - Added 4m and 3m interfaces for each level-3 operation. - Various other minor changes to facilitate 4m/3m methods. commit b29e1c2b278c177e104c84ba462820ee8296df6c Merge: ee60377e bd3c7ecf Author: Field G. Van Zee Date: Fri Feb 14 14:11:54 2014 -0600 Merge pull request #2 from tlrmchlsmth/master Fixes and improvements to xeon phi implementation. commit bd3c7ecfb54a9b9851c7d364f41c21e4cff52f6f Author: Tyler Smith Date: Fri Feb 14 14:05:57 2014 -0600 Removing changes to input.general and input.operations commit ce066863683cb4e910270cf8ab8e138b01ff3358 Author: Tyler Smith Date: Fri Feb 14 13:40:24 2014 -0600 Fixed more Xeon Phi bugs, especially with scattered update commit 31134b5c7076423aee1b4f494e925f27171d97e6 Author: Tyler Smith Date: Fri Feb 14 11:19:44 2014 -0600 Some fixes, changes, and improvements to the microkernel to the Xeon Phi commit ee60377e467862b9d8a7205c45dce5cf66c78c46 Author: Field G. Van Zee Date: Thu Feb 13 14:03:31 2014 -0600 Shifted some fields in info_t. Details: - Shifted the pack order, pack buffer type, and structure type fields to make room for an extra bit in the pack type/status field. commit bd3ab1ad4cf42f8bc30ab262acf8eccb49bb1a08 Author: Field G. Van Zee Date: Thu Feb 13 09:29:55 2014 -0600 Minor fixes to trsm consistent with prev on trmm. Details: - Removed use of bli_min() and bli_max() that were only being used to try to support situations where the diagonal would intersect the short end of some micro-panels, which is situation that is disallowed at a higher level by various constraints on the register and cache blocksize. This only affected trsm_ll and trsm_lu. - Use panel stride as passed into the macro-kernel rather than compute it via k and PACKMR/PACKNR. This affects all macro-kernels of trsm. commit 6260b0b5f8bd248f3f66e5a1c6854bdbd9d02ad0 Author: Field G. Van Zee Date: Thu Feb 13 09:19:56 2014 -0600 Fixed obscure bug in trmm_ll, trmm_lu. Details: - Fixed an obscure bug in left-hand trmm that would only manifest when non-zero register blocksize extensions (PACKMR > MR or PACKNR > NR) are used. - Removed use of bli_min() and bli_max() that were only being used to try to support situations where the diagonal would intersect the short end of some micro-panels, which is situation that is disallowed at a higher level by various constraints on the register and cache blocksize. This only affected trmm_ll and trmm_lu. - Use panel stride as passed into the macro-kernel rather than compute it via k and PACKMR/PACKNR. This affects all macro-kernels of trmm. commit 16915c1c1e55c660bf82141cdadf7c0860d5b464 Author: Field G. Van Zee Date: Tue Feb 11 10:54:19 2014 -0600 Fixed an obscure bug in packm_cxk(). Details: - Fixed a bug in packm_cxk() whereby the packm ukernel was being chosen from ldp, which is always equal to PACKMR or PACKNR. The problem with this is that the pack ukernels were implicitly assuming that the panel dimension of the panel being packed was equal to ldp, which is not the case when the register blocksizes extensions are non-zero (ie: when PACKMR > MR or PACKNR > NR, whichever is applicable). This problem has been fixed by passing ldp into the pack ukernels, which now walk through the packed micro-panel region by incrementing by this value, rather than incrementing by the inherent panel dimension value assumed by each packm ukernel (e.g. 4 in the case of packm_ref_4xk). - Also fixed a very minor edge case inefficiency whereby pack ukernels smaller than the default were not being used in edge cases, and instead those situations were being handled by scal2m. This is related to the issue above, because the pack ukernel itself was being chosen based on ldp instead of the panel dimension. commit b7da57b282c5a5e2208946e60309d2352f55351d Author: Field G. Van Zee Date: Tue Feb 11 10:28:23 2014 -0600 Updated calls to packm_blk_var2() in testsuite. Details: - In ukernel testsuite modules, replaced calls to packm_blk_var2() with _var1(). Meant to include this in previous commit. commit c255a293e25b2223c88e8800267cd06ad2a90041 Author: Field G. Van Zee Date: Mon Feb 10 14:31:24 2014 -0600 Consolidated packm_blk_var2 and var3. Details: - Consolidated the functionality previously supported by packm_blk_var2() and packm_blk_var3() into a new variant, packm_blk_var1(). - Updates to packm_gen_cxk(), packm_herm_cxk.c(), and packm_tri_cxk() to accommodate above changes. - Removed packm_blk_var3() and retired packm_blk_var2() to frame/1m/packm/old. - Updated all level-3 _cntl_init() functions so that the new, more versatile packm_blk_var1 is used for all level-3 matrix packing. commit 32d8f264ae7b28155f5d7b21dcc5ecb78da2e0ab Author: Field G. Van Zee Date: Sun Feb 9 10:07:37 2014 -0600 Refactored packm variants. Details: - Revised packm_blk_var2() and _var3() by encapsulating the general, hermitian/symmetric, and triangular panel-packing subproblems into separate functions: packm_gen_cxk(), packm_herm_cxk(), and packm_tri_cxk(), respectively. Also, homogenized the packm code as well as the new specialized packm_*_cxk() code to further improve readability. commit 6c8067028707947fcdf4f856a272e15bb9ed91e3 Author: Field G. Van Zee Date: Fri Feb 7 11:27:15 2014 -0600 Renamed enumerated type in testsuite and modules. Details: - Renamed the test suite's "mt_impl_t" enumerated type to "iface_t", and renamed all corresponding "impl" variables to "iface". commit 6c12598b1bc567f0b08f58aebdc753a1c1390378 Author: Field G. Van Zee Date: Thu Feb 6 18:26:35 2014 -0600 Employ simpler INSERT_ macro for ref ukernels. Details: - Defined a new macro, INSERT_GENTFUNC_BASIC0, which takes only one argument--the base name of the function--and employed this macro in the reference micro-kernel files instead of the _BASIC macro, which takes one auxiliary argument. That argument was not being used and probably just acted to unnecessarily obfuscate. commit 32cae66326b68706d0e695cfd60c9ca5bc32c534 Author: Field G. Van Zee Date: Thu Feb 6 18:06:42 2014 -0600 Fixed some instances of sloppy 'restrict' usage. Details: - Fixed some technical incorrectness with some usage of the 'restrict' keyword in the reference trsm micro-kernels. - Tweak to testsuite/Makefile that causes rebuild if libblis was touched. commit 7aceef7683e2a2aff3c7ec2a73508036af2e19e2 Author: Field G. Van Zee Date: Thu Feb 6 17:31:19 2014 -0600 Updated comments in macro-kernels. Details: - Updated (and fixed some errors in) the "Assumptions/assertions" comment section of macro-kernels. - Changed register blocksizes of reference configuration to MR = 8 and NR = 4. It's always good for MR != NR in the reference configuration since it may help uncover bugs related to non-square micro-kernels. commit 8fd292aa78950bcdf556605718f09d13f9575abc Author: Field G. Van Zee Date: Thu Feb 6 14:32:21 2014 -0600 Pass panel dimensions into macro-kernels. Details: - Modified the interfaces to the datatype-specific macro-kernels so that: - pd_a and pd_b are passed in (which contain the panel dimensions of packed panels of a and b). - rs_a and cs_b are no longer passed in (they were guaranteed to be 1). - Modified implementations of datatype-specific macro-kernels so pd_a, pd_b, cs_a, and rs_b are used instead of cpp macros for MR, NR, PACKMR, and PACKNR, respectively. - Declare temporary c matrices (ct) as being maxmr-by-maxnr, which for now is equivalent to being mr-by-nr. maxmr and maxnr are declared in a new header file bli_kernel_post_macro_defs.h. commit 3404e6657eabb017cd1580a2f1dd8e6fb13df923 Author: Field G. Van Zee Date: Wed Feb 5 11:19:10 2014 -0600 Deprecated incremental blocksize macro const defs. Details: - Removed macro constant definitions related to incremental blocksizes from all configurations' bli_kernel.h files. This change is minor and is mostly a cleanup related to a previous commit. commit 1e9afd39a63e0a58167d4439c1a0a880a4a35657 Author: Field G. Van Zee Date: Tue Feb 4 20:15:19 2014 -0600 Comment updates (removed vestiges of "bd"). commit 5cf58f7c2d5bc0d2d94d9576f7158d8f133b7aac Author: Field G. Van Zee Date: Tue Feb 4 09:15:19 2014 -0600 Added early returns for "object is zeros" case. Details: - Added some logic to packm_init(), pack_int() and gemm_int() so that (a) objects marked as BLIS_ZEROS are not packed, and (b) those objects are not computed with. This functionality is not currently needed by any existing implementations, but may be used in the future. commit 6bbd4be769a9b344a55abe5ddaca1a99fd29f7b4 Author: Field G. Van Zee Date: Mon Feb 3 13:15:25 2014 -0600 Added 'f' on some gemm and trmm blocked variants. Details: - Added 'f' to some block variant files/functions to be consistent with other file/functions' naming convention. Here, the f indicates partitioning in the "forward" direction. commit eb13cb2c6b182df5e2a9b88c76f50e2cee25b9e0 Author: Field G. Van Zee Date: Mon Feb 3 11:07:01 2014 -0600 Removed redundant non-gemm blksz_t creation. Details: - Removed code that creates duplicate blksz_t objects for herk, trmm, and trsm. Instead, the gemm blksz_t objects are accessed via extern and used directly. This reduces the amount of code associated with each of the three _cntl_init() and _cntl_finalize() function. commit 0a023a7d9e58e53b8c204a5f49aa8ca9afeba938 Author: Field G. Van Zee Date: Wed Jan 29 14:02:08 2014 -0600 Introduced new level-3 front-end layer. Details: - Added new _front() functions for each level-3 operation. This is done so that the choosing of the control tree (and *only* the choosing of the control tree) happens in what was previously the "front end" (e.g. bli_gemm()). That control tree is then passed into the _front() function, which then performs up-front tasks such as parameter checking. commit 251c5d112196d37b183e554bc9d406104aed65fb Author: Field G. Van Zee Date: Tue Jan 28 19:40:29 2014 -0600 Removed redundant hemm, her2k control trees. Details: - Removed code that generated a control tree specifically for hemm and symm. Instead, the gemm control tree is now configured so that it works for gemm, hemm, or symm. - Retired most her2k code, as it was not being used. (Currently, her2k is implemented as two invocations of herk.) I couldn't think of many situations where her2k variants were needed. - Removed some older her2k code. commit 5a36e5bf2f59d1e85d6dbce32a07d604c5e82d11 Author: Field G. Van Zee Date: Mon Jan 27 11:13:00 2014 -0600 Embed func_t microkernel objects in control trees. Details: - Modified all control tree node definitions to include a new field of type func_t*, which is similar to a blksz_t except that it contains one function pointer (each typed simply as void*) for each datatype. We use the func_t* to embed pointers to the micro-kernels to use for the leaf-level nodes of each control tree. This change is a natural extension of control trees and will allow more flexibility in the future. - Modified all macro-kernel wrappers to obtain the micro-kernel pointers from the incomming (previously ignored) control tree node and then pass the queried pointer into the datatype-specific macro-kernel code, which then casts the pointer to the appropriate type (new typedefs residing in bli_kernel_type_defs.h) and then uses the pointer to call the micro- kernel. Thus, the micro-kernel function is no longer "hard-coded" (that is, determined when the datatype-specific macro-kernel functions are instantiated by the C preprocessor). - Added macros to bli_kernel_macro_defs.h that build datatype-specific base names if they do not exist already, and then uses those to build datatype-specific micro-kernel function names. This will allow developers extra flexibility if they wanted to, for example, name each of their datatype-specific micro-kernels differently (e.g. double real might be named bli_dgemm_opt_4x4() while double complex might be named bli_zgemm_opt_2x2()). - Inserted appropriate code into _cntl_init() functions that allocates and initializes a func_t object for the corresponding micro-kernels. The gemm ukernel func_t object is created once, in bli_gemm_cntl_init(), and then reused via extern wherever possible. commit 6cbd6f1c7f1915180aa28939833afde48665c5ae Author: Field G. Van Zee Date: Fri Jan 24 10:38:29 2014 -0600 Removed commented mixed domain macro-kernel code. Details: - Removed commented-out code from macro-kernels that was supposed to facilitate implementing mixed domain (complex times real) matrix multiplication. This functionality is still (probably possible), but I'm getting tired of looking at the code every time I edit a macro-kernel. Plus, there are probably ways of doing it at a higher level, via control trees. commit 29778be1119f1a884330d7f8dc424a2df4101d58 Author: Field G. Van Zee Date: Wed Jan 22 16:03:11 2014 -0600 Removed b_aux field from cntl nodes. Details: - Removed b_aux field from all control tree node definitions. This field was being used in certain optimizations (incremental blocking) that were not actually being employed within BLIS, and are probably not employed by others. - Updated all _cntl_obj_create() function definitions and invocations according to above change. - Retired bli_gemm_blk_var4.c, which was one such function that employed incremental blocking, but which was never called by BLIS itself. commit 06ac727a42ec9e832c7832745036702014638f99 Author: Field G. Van Zee Date: Wed Jan 15 16:44:52 2014 -0600 Updated some comments in level-3 front ends. commit d628bf1da1560f1f5126a1ddfed8714f0a4b8da3 Author: Field G. Van Zee Date: Wed Jan 15 11:40:12 2014 -0600 Consolidated pack_t enums; retired VECTOR value. Details: - Changed the pack_t enumerations so that BLIS_PACKED_VECTOR no longer has its own value, and instead simply aliases to BLIS_PACKED_UNSPEC. This makes room in the three pack_t bits of the info field of obj_t so that two values are now unused, and may be used for other future purposes. - Updated sloppy terminology usage in comments in level-2 front-ends. (Replaced "is contiguous" with more accurate "has unit stride".) commit ddc8c1c379b4787be5954802906593d7ea144452 Author: Field G. Van Zee Date: Mon Jan 13 14:55:43 2014 -0600 Suppress warning in Makefile (UNINSTALL_LIBS). Details: - Redirect errors to /dev/null when using 'find' to locate libraries that would be uninstalled upon executing "make uninstall-old". Before, if the Makefile was read before $(INSTALL_PREFIX)/lib existed, a "No such file or directory" message was emitted. This message was harmless, but is now suppressed in this situation. commit f8f67d7251bffc05020e20527c100c8115fd5e55 Author: Field G. Van Zee Date: Fri Jan 10 09:06:11 2014 -0600 Typecast bli_getopt() return value in testsuite. Details: - In the test suite driver, inserted an explicit typecast of the return value of bli_getopt() prior parsing. The lack of typecast caused a problem on at least one system whereby a return value of -1 was interpreted as garbage character. Thanks to Francisco Igual for finding and submitting this fix. commit e7f154fe2ed3e10e2323cefe5d25c2c23ac902c4 Author: Field G. Van Zee Date: Fri Jan 10 08:48:07 2014 -0600 Applied edge case fix to arm/neon microkernel. Details: - Applied an edge case bugfix, courtesy of Francisco Igual, to the current double precision real gemm microkernel in kernels/arm/neon/3. commit 89c76a8a51d070d263c13bfa5ace65769509f2b4 Author: Field G. Van Zee Date: Thu Jan 9 12:08:37 2014 -0600 Allow building outside source distribution. Details: - Modified build system (mostly configure and top-level Makefile) so that a user can build a BLIS library outside of the top-level directory of the source distribution. - Added "test" target to Makefile so that the user can run "make test", which will compile, link, and run the testsuite binary. This works even if the build directory is externally located, thanks to the test suite binary's new -g and -o command-line options. Also, when creating the test suite via the top-level Makefile, the linking is against the local archive, in lib/, rather than at /lib. - Modified testsuite/Makefile so that it links against the library built locally, in ../lib/. - Added "-lm" to LDFLAGS of most configurations' make_defs.mk. - Various other cleanups to build system. commit 12fa82ec12cc340ab28552997d9d50f7c98691f8 Author: Field G. Van Zee Date: Wed Jan 8 16:09:26 2014 -0600 Implemented bli_getopt(). Details: - Added bli_getopt.c and .h files to frame/base. These files implement a custom version of getopt(), which may be used to parse command line options passed into a program via argc/argv. I am implementing this function myself, as opposed to using the version available via unistd.h, for portability reasons, as the only requirements are string.h (which is available via the standard C library). - Modified test suite to allow the user to specify the file name (and/or path) to the parameters and operations input files: -g may be used to specify the general input file and -o to specify the operations input file). If -g or -o or both are not given, default filenames are assumed (as well as their existence in the current directory). commit cafb58e86ea5cfb21b9eedc57ca8ebbf24252098 Author: Field G. Van Zee Date: Mon Jan 6 13:28:36 2014 -0600 Updated template micro-kernels to use auxinfo_t. Details: - Updated template micro-kernel implementations (located in config/template/kernels), to adhere to the new auxinfo_t interface. Meant to include this change in a0331fb1. - Changed template configuration to use 64-bit integers (for both BLIS and the BLAS compatibility layer). commit 9ab126b499c3805045020cb89a8a5848e28d3bf5 Author: Field G. Van Zee Date: Mon Jan 6 12:13:26 2014 -0600 Removed error checks in netlib->BLIS param mapping Details: - Disabled error checking in netlib-to-BLIS parameter mapping functions. If the char value input to these functions was not one of the defined values, bli_check_error_code() with the appropriate error code value would be called, resulting in an abort(). This was unnecessary and redundant since these routines are currently only used within the BLAS compatibility layer, and they are only called AFTER parameter checking has already been performed on the original BLAS char values. If the application tried to override xerbla() to prevent an abort() from being called, this error checking would still get in the way. Thus, instead of reporting the error situation to the framework (ie: calling abort()), an arbitrary BLIS parameter value is now chosen and the function returns normally. Thanks to Jeff Hammond for finding and reporting this issue. commit 2cb13600f9f9601c60e7f96f4ca159d169ade9cb Author: Field G. Van Zee Date: Fri Jan 3 12:29:13 2014 -0600 Updated year in copyright headers to 2014. commit 290fa54e0083c9c837188b8321b13b1b282e7b0c Author: Field G. Van Zee Date: Fri Dec 20 14:10:26 2013 -0600 Store variable panel strides in trmm/trsm auxinfo. Details: - Changed the value being stored into the auxinfo_t structure in trmm and trsm macro-kernels. Whereas before we stored whatever value was provided to the macro-kernel implementation via ps_a/ps_b, now we store the stride that will advance to the next variable-length micro-panel of the triangular matrix A (left) or B (right). - Whitespace changes to the files affected above. commit e3a6c7e77667fd749248df3f75f880266c3136ec Author: Field G. Van Zee Date: Thu Dec 19 16:29:31 2013 -0600 Macroized conditionals for a2/b2 in macro-kernels. Details: - Replaced conditional expressions in macro-kernels related to computing the addresses a2 and b2 (a_next and b_next) with a preprocessor macro invocation, bli_is_last_iter(), that tests the same condition. - Updated gemm_ukr module to use auxinfo_t argument. - Whitespace changes in test suite ukr modules. commit a0331fb10a50393e31d16339053b75b944132da1 Author: Field G. Van Zee Date: Thu Dec 19 14:50:11 2013 -0600 Introduced auxinfo_t argument to micro-kernels. Details: - Removed a_next and b_next arguments to micro-kernels and replaced them with a pointer to a new datatype, auxinfo_t, which is simply a struct that holds a_next and b_next. The struct may hold other auxiliary information that may be useful to a micro-kernel, such as micro-panel stride. Micro-kernels may access struct fields via accessor macros defined in bli_auxinfo_macro_defs.h. - Updated all instances of micro-kernel definitions, micro-kernel calls, as well as macro-kernels (for declaring and initializing the structs) according to above change. commit 392428dea4001fe4384efe29f6cde32f8abeeb35 Author: Field G. Van Zee Date: Thu Dec 12 19:01:47 2013 -0600 Added "ri" scalar macros. Details: - Added set of basic scalar macros that take arguments' real and imaginary components separately, named like the previous set except with the "ris" (instead of "s") suffix. - Redefined the previous set of scalar macros (those that take arguments "whole") in terms of the new "ri" set. - Renamed setris and getris macros to sets and gets. - Renamed setimag0 macros to seti0s. - Use bli_?1 macro instead of a local constant in bla_trmv.c, bla_trsv.c. commit f60c8adc2f61eaba06b892f4e73000159de93056 Author: Field G. Van Zee Date: Tue Dec 10 14:39:56 2013 -0600 Minor updates to dunnington configuration. Details: - Added commented alternatives to dunnington configuration's bli_kernel.h. - Minor reformatting of optimization flag variables in make_defs.mk. commit 4ef20150492db254b5baf2368add62e19b0ac11b Author: Field G. Van Zee Date: Mon Dec 9 18:53:03 2013 -0600 Tweaks to dunnington configuration (x86_64/core2). Details: - Updated BLIS_DEFAULT_KC_D from 256 to 384. - Enabled cache blocksize extension of up to 25% for MC and KC (for double-precision real). commit 5ad2ce7bf5ba3ea955e6d517bfd270e02820263b Author: Field G. Van Zee Date: Mon Dec 9 18:30:49 2013 -0600 Minor x86_64 (core2) kernel fixes. Details: - Fixed copy-and-paste bug whereby [scz]gemmtrsm_u_opt_d4x4 kernels for x86_64/core2 were calling the wrong reference code (l instead of u). - Fixed some unused variables in x86_64/core2 dotaxpyv and dotxaxpyf kernels. - Minor typecasting fix in testsuite/src/test_libblis.c. - Makefile updates. commit d289f5d3a9c0e1a68a17c1c32b736e282a289c4c Author: Field G. Van Zee Date: Thu Dec 5 10:56:13 2013 -0600 Whitespace changes to level-2 blocked variants. Details: - Joined some lines in level-2 blocked variants to match formatting used in level-3 blocked variants. - Streamlined implementation of bli_obj_equals() in bli_query.c. commit b444489f100d218bc8ef29b01ff8489c358559f9 Author: Field G. Van Zee Date: Tue Dec 3 16:08:30 2013 -0600 Added new "attached" scalar representation. Details: - Added infrastructure to support a new scalar representation, whereby every object contains an internal scalar that defaults to 1.0. This facilitates passing scalars around without having to house them in separate objects. These "attached" scalars are stored in the internal atom_t field of the obj_t struct, and are always stored to be the same datatype as the object to which they are attached. Level-3 variants no longer take scalar arguments, however, level-3 internal back-ends stll do; this is so that the calling function can perform subproblems such as C := C - alpha * A * B on-the-fly without needing to change either of the scalars attached to A or B. - Removed scalar argument from packm_int(). - Observe and apply attached scalars in scalm_int(), and removed scalar from interface of scalm_unb_var1(). - Renamed the following functions (and corresponding invocations): bli_obj_init_scalar_copy_of() -> bli_obj_scalar_init_detached_copy_of() bli_obj_init_scalar() -> bli_obj_scalar_init_detached() bli_obj_create_scalar_with_attached_buffer() -> bli_obj_create_1x1_with_attached_buffer() bli_obj_scalar_equals() -> bli_obj_equals() - Defined new functions: bli_obj_scalar_detach() bli_obj_scalar_attach() bli_obj_scalar_apply_scalar() bli_obj_scalar_reset() bli_obj_scalar_has_nonzero_imag() bli_obj_scalar_equals() - Placed all bli_obj_scalar_* functions in a new file, bli_obj_scalar.c. - Renamed the following macros: bli_obj_scalar_buffer() -> bli_obj_buffer_for_1x1() bli_obj_is_scalar() -> bli_obj_is_1x1() - Defined new macros to set and copy internal scalars between objects: bli_obj_set_internal_scalar() bli_obj_copy_internal_scalar() - In level-3 internal back-ends, added conditional blocks where alpha and beta are checked for non-unit-ness. Those values for alpha and beta are applied to the scalars attached to aliases of A/B/C, as appropriate, before being passed into the variant specified by the control tree. - In level-3 blocked variants, pass BLIS_ONE into subproblems instead of alpha and/or beta. - In level-3 macro-kernels, changed how scalars are obtained. Now, scalars attached to A and B are multiplied together to obtain alpha, while beta is obtained directly from C. - In level-3 front-ends, removed old function calls meant to provide future support for mixed domain/precision. These can be added back later once that functionality is given proper treatment. Also, removed the creating of copy-casts of alpha and beta since typecasting of scalars is now implicitly handled in the internal back-ends when alpha and beta are applied to the attached scalars. commit 992de486d6f23e69a623abd15ae77d7881d13871 Merge: 9552e6ee fd4ac636 Author: Field G. Van Zee Date: Mon Dec 2 13:58:46 2013 -0600 Unimplemented kernels now call reference. Details: - Updated arm, bgq, loongson3a, and x86_64 kernels so that unimplemented datatypes call the corresponding reference kernel. Previously, these kernel functions called abort() with a "not yet implemented" error message. commit fd4ac636d9a55cec1476a444bd4e70def219dc8f Author: Field G. Van Zee Date: Mon Dec 2 13:50:36 2013 -0600 Unimplemented kernels now call reference. Details: - Updated micro-kernels for arm, bgq, loongson3a, and x86_64 so that unimplemented kernel functions simply call the corresponding reference implementation. (Previously, these unimplemented functions would abort() with a "not yet implemented" message.) commit 9552e6ee824d4345d5e908e869e071d19829819a Author: Field G. Van Zee Date: Sun Nov 24 11:40:31 2013 -0600 Removed optional scaling from packm control tree. Details: - Removed does_scale field from packm control tree node and bli_packm_cntl_obj_create() interface. Adjusted all invocations of _cntl_obj_create() accordingly. - Redefined/renamted macros that are used in aliasing so that now, bli_obj_alias_to() does a full alias (shallow copy) while bli_obj_alias_for_packing() does a partial alias that preserves the pack_mem-related fields of the aliasing (destination) object. - Removed bli_trmm3_cntl.c, .h after realizing that the trmm control tree will work just fine for bli_trmm3(). - Removed some commented vestiges of the typecasting functionality needed to support heterogeneous datatypes. commit e65c476284db9ef64b23191a21c2584b1083342f Author: Field G. Van Zee Date: Tue Nov 19 10:05:35 2013 -0600 Minor updates to packm_blk_var2.c and _blk_var3.c. Details: - Comment updates to packm_blk_var2.c and packm_blk_var3.c. - In packm_blk_var2(), call setm_unb_var1(), scal2m_unb_var1() directly instead of setm(), scal2m(). commit 9e1d0d4bca48eda54301d8976f203e2544c9df3a Author: Field G. Van Zee Date: Mon Nov 18 18:11:07 2013 -0600 Added trsm_l, trsm_u ukernels for x86_64/core2. Details: - Added standalone trsm_l/trsm_u micro-kernels for x86_64 (core2). These kernels are based on the gemmtrsm_l/gemmtrsm_u micro-kernels that already existed in kernels/x86_64/core2-sse3/3. commit 85e7e02ea3a9190b6fcff5d46b00d41c79cb1242 Merge: 67761e22 70720054 Author: Field G. Van Zee Date: Mon Nov 18 12:02:00 2013 -0600 Merge branch 'master'. Forgot to git-pull. commit 67761e224c92500eecf9c1540cc72bdd2fb27679 Author: Field G. Van Zee Date: Mon Nov 18 11:57:40 2013 -0600 Attempting to fix errors in bgq build. Details: - Removed restrict declaration from b_cast and c_cast from bli_trsm_lu_ker_var2.c and bli_trsm_rl_ker_var2.c. Curiously, they are causing problems for xlc only in those two files and no other macro-kernels. - Fixed (hopefully) kernel function parameter type declarations in kernels/bgq/1f/bli_axpyf_opt_var1.c and kernels/bgq/3/bli_gemm_8x8.c. commit 707200541d344f98cf34c9801954dbb36fbe0447 Author: Field G. Van Zee Date: Mon Nov 18 11:17:31 2013 -0600 Syntax error fix in x86_64/core2 gemmtrsm_u ukr. commit bbe2b84a49e7785d4d0c514cda34adfbe66478b0 Author: Field G. Van Zee Date: Mon Nov 18 11:11:06 2013 -0600 Updated Makefile in test, testsuite. Details: - Updated Makefiles in test and testsuite directories to use the new BLIS header installation directory scheme, which is to compile with -I/include/blis instead of -I/include. commit 9bd7fcfd436625ca2108128086671319362f4d92 Author: Field G. Van Zee Date: Mon Nov 18 10:58:09 2013 -0600 Outer-to-inner 'restrict' fix in macro-kernels. Details: - Fixed sloppy placement of 'restrict' pointer declarations in level-3 macro-kernels. Previously, all restricted pointers were being declared at the outer-most function scope level. While this violates the C99 standard, very few of the compilers used with BLIS so far have seemed to care. The lone exception has been IBM's xlc. Thanks to Tyler Smith for identifying this bug (and suggesting the fix). commit 50549a6a31dd26cf63a013e0ede16b2c7ce835b6 Author: Field G. Van Zee Date: Sun Nov 17 18:31:27 2013 -0600 Changed header install directory to include/blis. Details: - Changed top-level Makefile so that headers are installed to $(INSTALL_PREFIX)/include/blis/. (Header directories are no longer named by version/configuration and then symlinked.) - Added uninstall targets, including uninstall-old to clean out old library archives. - Added GREP makefile definitions to all configurations' make_defs.mk. commit d70733abddfb9a95661897e1e4f3c1f3cfa7cbaa Author: Field G. Van Zee Date: Sat Nov 16 17:34:25 2013 -0600 Added ARM kernels, configurations. Details: - Added kernels for ARM, and configurations for Cortex-A9 and Cortex-A15. Thanks to Francisco Igual for contributing these kernels and configurations. commit d37c2cff62089c86983c2f79762f4b5329037373 Author: Field G. Van Zee Date: Wed Nov 13 10:47:11 2013 -0600 Minor comment and Makefile changes. Details: - Added missing 'check-config' and 'check-make-defs' targets to testsuite/Makefile. - Removed unused 'test' target from top-level Makefile. - Comment changes to testsuite input files. commit 19885f893a17b91ee79bead0620d0f913392d4c5 Author: Field G. Van Zee Date: Mon Nov 11 12:09:21 2013 -0600 Updated some kernel comment headers. Details: - Updated bgq and piledriver comment headers to use BLIS copyright header instead of libflame. commit 1a4d698f42981d74fe5f29b980031e1ee7dc42d5 Author: Field G. Van Zee Date: Mon Nov 11 10:15:40 2013 -0600 CHANGELOG update (for 0.1.0). commit 089048d5895a30221b6b1976c9be93ad6443420d Author: Field G. Van Zee Date: Sat Nov 9 17:18:00 2013 -0600 Added object wrappers to 1f test suite modules. Details: - Added missing object wrappers to level-1f test suite modules. This was only apparent if you were configuring with something other than the reference configuration. - Commented out object-wrappers in level-1f front-ends. These were not working as intended the reference configuration was selected, because most kernel sets, such as those in the template set, do not have object wrappers. - Whitespace changes to template micro-kernels. - Comment changes to template level-1f kernel headers. commit 9ef3752079de10124bed906b5d28479d04aa8187 Author: Field G. Van Zee Date: Fri Nov 8 17:20:47 2013 -0600 Updated template kernels wrt KernelsHowTo wiki. Details: - Merged latest state of KernelsHowTo wiki into template micro-kernels located in config/template/kernels/3. commit 376bbb59c8944e29c5c1ff6637920d8451370afa Author: Field G. Van Zee Date: Fri Nov 8 11:17:34 2013 -0600 Removed support for duplication. Details: - Removed support for duplication from the gemmtrsm/trsm micro-kernels and all framework code. - Updated test suite modules according to above changes. commit 68a5910974b62b4df853fae2a68cb04df9d5a19c Author: Field G. Van Zee Date: Thu Nov 7 11:36:11 2013 -0600 Added comments to testsuite/input.operations. Details: - Added extensive comments to the top of testsuite/input.operations, which describe how to edit the file. - Removed input.operations.0 and input.operations.1. - Changed input.general to test all datatypes ("sdcz") by default. commit a98f78b715fb256a519870071bb5266130d70b21 Author: Field G. Van Zee Date: Wed Nov 6 15:32:47 2013 -0600 Changed dim_t and inc_t to be signed integers. Details: - Redefined dim_t and inc_t in terms of gint_t (instead of guint_t). This will facilitate interoperability with Fortran in the future. (Fortran does not support unsigned integers.) - Redefined many instances of stride-related macros so that they return or use the absolute value of the strides, rather than the raw strides which may now be signed. Added new macros bli_is_row_stored_f() and bli_is_col_stored_f(), which assume positive (forward-oriented) strides, and changed the packm_blk_var[23] variants to use these macros instead of the existing bli_is_row_stored(), bli_is_col_stored(). - Added/adjusted typecasting to to various functions/macros, including bli_obj_alloc_buffer(), bli_obj_buffer_at_off(), and various pointer- related macros in bli_param_macro_defs.h. - Redefined bli_convert_blas_incv() macro so that the BLAS compatibility layer properly handles situations where vector increments are negative. Thanks to Vladimir Sukharev for pointing out this issue. - Changed type of increment parameters in bli_adjust_strides() from dim_t to inc_t. Likewise in bli_check_matrix_strides(). - Defined bli_check_matrix_object(), which checks for negative strides. - Redefined bli_check_scalar_object() and bli_check_vector_object() so that they also check for negative stride. - Added instances of bli_check_matrix_object() to various operations' _check routines. commit 1f8afc3e08a4312cfe810be86aedeacbc57275c5 Author: Field G. Van Zee Date: Wed Nov 6 10:09:10 2013 -0600 Minor comment update to BLAS compat files. commit 1abbf768afafc158d44e4d5c4a135cfd9e277f13 Author: Field G. Van Zee Date: Mon Nov 4 15:50:00 2013 -0600 Fixed bugs in scalv and setv. Details: - Fixed bugs similar to those addressed in cca1e1f51dc6, whereby a segmentation fault may occur if beta is not the same type as the vector operand for scalv and setv. - Changed axpyv and scal2v front-ends in a similar fashion. commit f5953259a1842ee48e5833c22ac86e68a337bfe1 Author: Field G. Van Zee Date: Mon Nov 4 14:43:55 2013 -0600 Fixed a bug related to Hermitian matrix diagonals. Details: - Fixed a bug whereby BLIS assumed that the imaginary components of the diagonal elements of Hermitian matrices were already zero. This property is now enforced when the matrix is packed (bli_packm_blk_var2). Thanks to Vladimir Sukharev for reporting this bug. - Minor comment updates to template kernels. commit d70f2b089dac8b9e4c19295dfa6014c36afee2ec Author: Field G. Van Zee Date: Sat Nov 2 17:19:40 2013 -0500 Added scaling to abval2s, sqrt2s macros. Details: - Re-defined abval2s and sqrt2s macros to use scaling to avoid underflow and overflow from squaring the real and imaginary components. (This is the same technique used to fix recent bugs in invscals/invscaljs and inverts.) commit c5b1ed9409ae2f71d04041eef5da9a0080b5784a Author: Field G. Van Zee Date: Fri Nov 1 10:28:04 2013 -0500 Added new dotxaxpyf variant 2. Details: - Added a new variant for dotxaxpyf that is based on dotxf and axpyf kernels. By default, this variant is not used by any other operation. commit 97f89fbcf202d72fc440b614708e352ea31633e2 Author: Field G. Van Zee Date: Fri Nov 1 10:16:39 2013 -0500 Fixed bug in complex invscals. Details: - Fixed complex inversion in invscals and invscaljs whereby the imaginary component was being computed incorrectly. - Use bli_fmaxabs() instead of bli_fabs() when choosing the scalar in inverts, invscals, and invscaljs. - Changed bli_abs() and bli_fabs() macro definitions to use "<=" operator instead of "<". commit eda42a21d17a2742eab69ab801ed530b82488c8a Author: Field G. Van Zee Date: Thu Oct 31 18:00:44 2013 -0500 Defined missing symbols in bla_rotg.c Details: - Defined local equivalents of libf2c's r_sign(), d_sign(), c_abs(), and z_abs(), which are needed by bla_rotg.c. Also defined r_abs() and d_abs() for completeness. Thanks to Vladimir Sukharev for reporting these bugs. commit cca1e1f51dc67a2c3725d5c1837256831aaf70f8 Author: Field G. Van Zee Date: Wed Oct 30 14:39:01 2013 -0500 Fixed bugs in scalm and setm. Details: - Fixed bugs in scalm and setm that resulted in segmentation faults when beta is not the same type as the matrix operand. Thanks to Vladimir Sukharev for reporting this bug. - Changed axpym and scal2m front-ends in fashion similar to that of scalm and setm; namely, the alpha scalar is copy-cast the type of the first matrix operand. - Changed the template and reference configurations' bli_config.h files so that the number of memory allocator blocks of A and B are set based on BLIS_MAX_NUM_THREADS. - Comment updates to bli_obj.c and variable rename in bla_nrm2.c. commit 2807013a4761c2b84b3944de64d23483ad7ef2fb Author: Field G. Van Zee Date: Thu Oct 24 14:32:20 2013 -0500 Fixed over/under-flow in complex inversion. Details: - Fixed the complex bli_?inverts() macros, which were inverting elements in an "unsafe" manner, such that very large and very small values were unnecessarily over/under-flowing. Thanks for Vladimir Sukharev for reporting this bug. - Comment update to bli_sumsqv_unb_var1.c. - Removed redundant bli_min() macro in bli_scalar_macro_defs.h. - Changed 1.0F to 1.0 for bli_drands() macro. commit 45a80c625f84edb2ade6ac25efe2b9c589d7e0df Author: Field G. Van Zee Date: Wed Oct 23 12:15:25 2013 -0500 Fixed parameter checking issue in BLAS syr[2]k. Details: - Fixed a minor parameter checking bug in the BLAS compatibility layer for [sd]syrk and [sd]syr2k. Specifically, if 'C' is passed in for the trans parameter of either operation, it is (a) allowed, and (b) treated as 'T' (whereas previously it was disallowed). Thanks for Vladimir Sukharev for finding and reporting this bug. commit a091a219bda55e56817acd4930c2aa4472e53ba5 Author: Field G. Van Zee Date: Mon Oct 14 10:11:29 2013 -0500 Minor fixes to piledriver configuration, ukernel. Details: - Applied a patch from Tyler that fixes minor staleness in the piledriver configuration and gemm micro-kernel. - Very minor changes to test suite input files. commit dacdde27aee4fb90b14880136d7f20c6b234e2c6 Author: Field G. Van Zee Date: Fri Oct 11 11:37:19 2013 -0500 Added Fran's Sandy Bridge kernels/configuration. Details: - Added a kernel directory for kernels developed by Francisco Igual for the Sandy Bridge architecture, including a dgemm ukernel coded with AVX intrinsics. - Added a configuration for Sandy Bridge using values supplied by Fran. commit 03106d650e4030d4c9831683448376f92fc52d41 Author: Field G. Van Zee Date: Fri Oct 11 10:40:38 2013 -0500 Fixed minor perf bug in gemm_ker_var2. Details: - Fixed a minor performance bug in bli_gemm_ker_var2.c (and the experimental bli_gemm_ker_var5.c) whereby the addresses for a_next and b_next are not computed correctly (ie: do not wraparound) at the edge cases. Thanks to Tze Meng for helping me identify this bug. commit b053337387dbdef9035be03538222670a21707ca Author: Field G. Van Zee Date: Thu Oct 10 18:26:55 2013 -0500 Added fusing factors, MR/NR to test suite output. Details: - Updated the test suite driver (and modules where appropriate) so that the level-1f fusing factors are output along with the variable dimension. While this is not strictly necessary, since the fusing factors are output in the initial parameter summary, it allows extra reassurance to the user since the fusing factors appear alongside the variable dimension, which together give a complete picture of the problem size. Similar changes were made for outputting the register blocksizes when reporting results for the micro-kernel test modules. commit be4833bd91c5a58d0bfc52daaadf7ba543a77acf Author: Field G. Van Zee Date: Thu Oct 10 14:20:06 2013 -0500 Added test suite modules for level-1f, 3 kernels. Details: - Added test modules in test suite for level-1f kernels and level-3 micro-kernels. (Duplication in the micro-kernels, for now, is NOT supported by these test modules.) - Added section override switches to test suite's input.operations file. - Added obj_t APIs for level-1f front-ends and their unblocked variants to facilitate the level-1f test modules. Also added front-end for dupl operation. - Added obj_t-based check routines for level-1f operations, which are called from the new front-ends mentioned above. - Added query routines for axpyf, dotxf, and dotxaxpyf that return fusing factors as a function of datatype, which is needed by their respective test modules. - Whitespace changes to bli_kernel.h of all existing configurations. commit 680188d46bb15b9a1a2867638104939dc77ca2a1 Author: Field G. Van Zee Date: Thu Oct 10 13:23:37 2013 -0500 Cleaned up old test drivers. Details: - Minor updates to old test drivers in preparation for our participation in ACM TOMS's replicated results initiative. commit 3690bdd4f95769c935c410414112102cc3e108b1 Author: Field G. Van Zee Date: Thu Oct 10 11:45:33 2013 -0500 More updates to level-1f kernels for core2-sse3. Details: - Changed types in function signatures to match new prototypes. Meant to include this in previous commit. commit 661d5120cd7071f9b0c5cefc95f99f1361370ade Author: Field G. Van Zee Date: Thu Oct 10 11:27:27 2013 -0500 Fixed outdated fusing factor macros in 1f kernels. Details: - Updated level-1f kernels for x86_64 and bgq to use renamed fusing factor macros. Meant to include this in 5e54f46c. Thanks to Fran for pointing this out. commit 73aa1e9f31d1b2a319c7e711ced6db3f9835c832 Author: Field G. Van Zee Date: Tue Oct 1 17:01:18 2013 -0500 Added section overrides to test suite. Details: - Added new lines of input to the test suite's input.operations file, which allows the user to disable entire sections (levels) of tests. Before this change, the user had to manually disable each operation tests's "master switch". (This is why input.operations.0 existed: to allow a more convenient starting point for someone who only wanted to test one or a few operations.) commit 5e54f46ccb76beab892d530b693e07c6bf6db7cf Author: Field G. Van Zee Date: Mon Sep 30 12:58:18 2013 -0500 Added template implementations and other tweaks. Details: - Added a 'template' configuration, which contains stub implementations of the level 1, 1f, and 3 kernels with one datatype implemented in C for each, with lots of in-file comments and documentation. - Modified some variable/parameter names for some 1/1f operations. (e.g. renaming vector length parameter from m to n.) - Moved level-1f fusing factors from axpyf, dotxf, and dotxaxpyf header files to bli_kernel.h. - Modifed test suite to print out fusing factors for axpyf, dotxf, and dotxaxpyf, as well as the default fusing factor (which are all equal in the reference and template implementations). - Cleaned up some sloppiness in the level-1f unb_var1.c files whereby these reference variants were implemented in terms of front-end routines rather that directly in terms of the kernels. (For example, axpy2v was implemented as two calls to axpyv rather than two calls to AXPYV_KERNEL.) - Changed the interface to dotxf so that it matches that of axpyf, in that A is assumed to be m x b_n in both cases, and for dotxf A is actually used as A^T. - Minor variable naming and comment changes to reference micro-kernels in frame/3/gemm/ukernels and frame/3/trsm/ukernels. commit 97aaf220a847363b4da35935eca17790c0ef71f6 Author: Field G. Van Zee Date: Tue Sep 17 10:51:36 2013 -0500 Added new kernels, configurations. Details: - Added various micro-kernels for the following architectures: Intel MIC IBM BG/Q IBM Power7 AMD Piledriver Loogson 3A and reorganized kernels directory. Thanks to Tyler Smith, Mike Kistler, and Xianyi Zhang for contributing these kernels. - Added configurations corresponding to above architectures, and renamed "clarksville" configuration to "dunnington". commit fe979c5a114c877506a5697cdab1fc8cf2bcd303 Author: Field G. Van Zee Date: Fri Sep 13 14:31:53 2013 -0500 Removed default configuration behavior. Details: - Changed the configure script so that it no longer defaults to the reference configuration. This change is being made so that the developer has a firm awareness of which configuration is being used to configure BLIS. Thanks to Mike Kistler and Bryan Marker for this suggested change. commit da77e9614f54f92f703f01e3b9bd67a83280150c Author: Field G. Van Zee Date: Fri Sep 13 12:00:37 2013 -0500 Minor improvements to static memory allocator. Details: - Expanded on cpp macro definitions from bli_mem.c and relocated them to a new header file, frame/include/bli_mem_pool_macro_defs.h. The expanded functionality includes computing the pool size for each datatype (using that datatype's cache blocksizes) and using the maximum to size the actual pool array. This addresses the somewhat common pitfall whereby a developer updates cache blocksizes in bli_kernel.h for only one datatype (say, single-precision real), while the memory pools are sized using the double-precision real values. Then, when the developer attempts to link to and run a level-3 BLIS routine (e.g. dgemm), the library aborts with a message saying the static memory pool was exhausted. Clearly, this message is misleading when the pool was not sized properly to begin with. - Removed previously disabled code in bli_kernel_macro_defs.h that was meant to check for size consistency among the various cache blocksizes. (Obviously the memory pool size-based solution mentioned above is better.) - Added BLIS_SIZEOF_? cpp macros to bli_type_defs.h. This seemed like a reasonable place to put these constants, rather than further crowd up bli_config.h. - Updated testsuite driver to output memory pool sizes for A, B, and C. - Minor comment updates to bli_config.h. - Removed 'flame' configuration. It was beginning to get out-of-date, and I hadn't used it in months. We can always re-create it later. commit 631f347b7a99cb02757c534fd3ec5f723a2fdb0e Author: Field G. Van Zee Date: Tue Sep 10 17:17:28 2013 -0500 Added ESSL and Accelerate targets to test drivers. Details: - Added ESSL and Accelerate (OS X) targets to standalone test drivers' Makefile in "test" directory. Thanks to Jeff Hammond for suggesting / providing this patch. commit 7ae4d7a41d13ef5f1ceee217c000a5cf77a11128 Author: Field G. Van Zee Date: Tue Sep 10 16:35:12 2013 -0500 Various changes to treatment of integers. Details: - Added a new cpp macro in bli_config.h, BLIS_INT_TYPE_SIZE, which can be assigned values of 32, 64, or some other value. The former two result in defining gint_t/guint_t in terms of 32- or 64-bit integers, while the latter causes integers to be defined in terms of a default type (e.g. long int). - Updated bli_config.h in reference and clarksville configurations according to above changes. - Updated test drivers in test and testsuite to avoid type warnings associated with format specifiers not matching the types of their arguments to printf() and scanf(). - Inserted missing #include "bli_system.h" into blis.h (which was slated for inclusion in d141f9eeb6d1). - Added explicit typecasting of dim_t and inc_t to macros in bli_blas_macro_defs.h (which are used in BLAS compatibility layer). - Slight changes to CREDITS and INSTALL files. - Slight tweaks to Windows build system, mostly in the form of switching to Windows-style CRLF newlines for certain files. commit 068437736b41d51a1f5ec47839f059bf58a20413 Author: Field G. Van Zee Date: Mon Sep 9 14:07:58 2013 -0500 Fixed set-but-not-used compiler (gcc) warnings. Details: - Used void-casts of certain variables to appease gcc (and perhaps other compilers) when such variables are only used in the complex instances of the functions. Special thanks to Karl Rupp for suggesting a portable fix for these warnings. commit 6dc85f63dcd5282340c9e00d585e97d70a21edc3 Author: Field G. Van Zee Date: Mon Sep 9 13:48:52 2013 -0500 Small fix to Windows defs.mk makefile fragment. Details: - Commented out a !include statement that was attempting to include a version file that does not yet exist. For now, the version string is hard-coded into defs.mk. commit d141f9eeb6d1de7044b7429adf52d11c6fca620c Author: Field G. Van Zee Date: Mon Sep 9 13:09:16 2013 -0500 Added Windows build system. Details: - Added a 'windows' directory, which contains a Windows build system similar to that of libflame's. Thanks to Martin for getting this up and running. - Spun off system header #includes into bli_system.h, which is included in blis.h - Added a Windows section to bli_clock.c (similar to libflame's). commit 9b320e7406fb69e8b61a0085abe2ed89a96bdb68 Author: Field G. Van Zee Date: Mon Sep 9 11:04:46 2013 -0500 Edited bli_?lamch.c to avoid Windows keyword. Details: - Renamed "small" variable to "smnum" to avoid collision with Windows type by the same name. This change is needed in advance of the upcoming Windows build system. commit 9013ad6ff2e9ace35e0cf44c32795c2f3d5be628 Author: Field G. Van Zee Date: Wed Sep 4 13:36:07 2013 -0500 Switched integer typedefs (again) to C types. Details: - Redefined gint_t and guint_t in terms of the standard C types long int and unsigned long int, respectively. - Changed testsuite default max problem size to 500. - Changed testsuite input.operations to use square problems for level-3 operation tests. commit 981a60cfa07abac2e93697dfe12b0f076ab00a38 Author: Field G. Van Zee Date: Wed Sep 4 12:09:11 2013 -0500 Falling back to 32-bit integers for dim_t, etc. Details: - In light of recent segfaulting issues when compiling on 32-bit systems, I've changed the default typedef for gint_t and guint_t from int64_t and uint64_t to int32_t and uint32_t, respectively. - Disabled 64-bit integers in the blas2blis layer for the reference configuration. - Added type sizes of gint_t, guint_t, and the four floating-point datatypes to introductory output of the testsuite. commit b776ddcd4338b34f172ef78da0ac1d771a771ab4 Author: Field G. Van Zee Date: Tue Sep 3 21:58:07 2013 -0500 Applied temp fix to typecasting bug in testsuite. Details: - Applied a temporary fix to the typecasting bug in the testsuite driver. The fix involves casting both numerator and denominator to unsigned long. This fix is more voodoo than science, as I can't be sure why it even works. commit 9ee6e125373869c4213c017ce772c38ecefba103 Author: Field G. Van Zee Date: Tue Sep 3 21:53:27 2013 -0500 Changed dimension spec for gemm in testsuite. Details: - Encounted a bizarre typecasting bug whereby the test suite was not computing the proper dimension from the problem size and dimension specification when the latter was set to -3. Will investigate. Thanks to Fran for finding this "bug". commit e8be081e68c385ab44d0fea8dade21d40c200b79 Author: Field G. Van Zee Date: Wed Aug 28 15:52:34 2013 -0500 Generalized matlab and file output in testsuite. Details: - Added a new option in input.general that allows outputting in matlab/octave format so that one can output in matlab format independently from outputting to files. - Adjusted input.operations according to above. - Added input.operations.0 and input.operations.1 with all options disabled and enabled, respectively. commit d352c746e5683037d41b5061dfb5ce08e1d0843b Author: Field G. Van Zee Date: Tue Aug 27 13:41:46 2013 -0500 Added single/real gemm micro-kernel for x86_64. Details: - Added a single-precision real gemm micro-kernel in kernels/x86_64/3/bli_gemm_opt_d4x4.c. - Adjusted the single-precision real register blocksizes in config/clarksville/bli_kernel.h to be 8x4. - Added a missing comment to bli_packm_blk_var2.c that was present in bli_packm_blk_var3.c commit dedda523dc5dc779ecc34e6a03dc74cb8eb220de Author: Field G. Van Zee Date: Mon Aug 19 12:07:41 2013 -0500 Fixed bug in bli_acquire_mpart_t2b(), _l2r(). Details: - Fixed a bug in bli_acquire_mpart_t2b() and bli_acquire_mpart_l2r() that cause incorrect partitioning when SUBPART0 was requested. This bug was introduced in 46d3d09d49ad. Thanks to Bryan for isolating this bug. - Removed dupl kernels from kernels/x86_64/3 directory. - Uncommented beta == 0 optimizaition code in kernels/x86_64/3/bli_gemm_opt_d4x4.c. commit 12dbd2f33455e9384fe2070cbdd660fd4a7fceb5 Author: Field G. Van Zee Date: Thu Aug 8 14:39:35 2013 -0500 Moved init_safe(), finalize_safe() to BLAS compat. Details: - Moved the bli_init_safe() and bli_finalize_safe() function calls from the BLAS-like BLIS layer to the BLAS compatibility layer. Having these auto- initializers in the BLIS layer wasn't buying us anything because the user could still call the library with uninitialized global scalar constants, for example. Thus, we will just have to live with the constraint that bli_init() MUST be called before calling ANY routine with a bli_ prefix. - Added the missing _init_safe() and finalize_safe() calls to the level-1 BLAS compatibility wrappers. commit 8abfe55f2ae5d89df18e1b26a5a28d94b0936683 Author: Field G. Van Zee Date: Thu Aug 8 13:30:19 2013 -0500 Miscellaneous updates. Details: - Changed the BLIS_HEAP_STRIDE_ALIGN_SIZE in the configurations from 16 to BLIS_CACHE_LINE_SIZE (typically 64). - Changed the use of nr in sizing of bd buffer to packnr in level-3 macro- kernels. - Reformulated gemm_ker_var2 to look more like the other level-3 macro- kernels, in that the interior and edge-case handling is expressed once inside the loops in the n and m dimensions, rather than the edge-case handling being "unrolled" and expressed as distinct code regions. The previous macro-kernel now lives in retired form in the subdirectory other/bli_gemm_ker_var2.c.old. - Updated experimental gemm_ker_var5 according to above change. - Fixed bug in bli_her2k.c whereby incorrect transformations were being applied to optimize the macro-kernel accesses pattern on C when C is row-stored. - Various updates inside of test/exec_sizes. commit 1aa05736ff49e7cc5f121acf615460fe9a87852c Author: Field G. Van Zee Date: Wed Aug 7 12:27:04 2013 -0500 Fixed bug in interface of bla_ger_check(). Details: - Fixed the misplaced lda parameter in the function signature of bla_ger_check(). Thanks to Tyler for finding this bug. commit 685aad25353fb200de4ca97a8bc0feeebde51d0f Author: Field G. Van Zee Date: Tue Aug 6 12:25:51 2013 -0500 Fixed cpp guard typos in frame/compat/check files. Details: - Fixed instances of BLIS_ENABLE_BLIS2BLAS that should have been BLIS_ENABLE_BLAS2BLIS. Thanks to Tyler for catching this. - Fixed various syntax errors in the code that had yet to be compiled due to the aforementioned bug. commit f4ec28e723d28d998f1038f82da6986e44320ef6 Author: Field G. Van Zee Date: Thu Aug 1 11:24:23 2013 -0500 Added basic OpenMP-based gemm and packm files. Details: - Integrated Tyler's parallelized packm_blk_var2 and gemm_ker_var2 into the following auxiliary files frame/1m/packm/other/bli_packm_blk_var2.c frame/3/gemm/other/bli_gemm_ker_var2.c The routine in the first file uses a basic OpenMP parallel region to parallelize the packing of blocks of A and panels of B, while the second uses a similar parallel region to parallelize along the n dimension of the gemm macro-kernel. commit f8980edf9c318453bb1962ac4939c06bf11e6d5e Merge: 67a8b949 6e7e4523 Author: Field G. Van Zee Date: Fri Jul 26 11:14:27 2013 -0500 Merge branch 'master' of https://code.google.com/p/blis commit 67a8b9498d13b038deb316ac163e62c5b17da2ec Author: Field G. Van Zee Date: Fri Jul 26 11:12:37 2013 -0500 Added missing cpp kernel blocksize constraints. Details: - Added missing C preprocessor guards in bli_kernel_macro_defs.h that enforce constraints on the register blocksizes relative to the cache blocksizes. Thanks to Tyler for helping me stumble across this issue. commit 6e7e452343014e8f86640874dc1dbadca4a642a1 Author: Field G. Van Zee Date: Mon Jul 22 14:50:57 2013 -0500 Fixed minor warnings and misc issues. Details: - Fixed various warnings output by gcc 4.6.3-1, including removing some set-but-not-used variables and addressing some instances of typecasting of pointer types to integer types of different sizes. commit 03f6c3599743bc837a7d40eb5b415b1bf4f2a4e9 Author: Field G. Van Zee Date: Mon Jul 22 12:54:32 2013 -0500 Tightened some macros that detect datatypes. Details: - Modified the definitions of some macros, such as bli_is_real(), so that the "special" bit is taken into account so that BLIS_INT is differentiated from BLIS_FLOAT. - Whitespace changes to bli_obj_macro_defs.h. - Removed BLIS_SPECIAL_BIT definition from bli_type_defs.h, since it wasn't being used. commit b33e2f4443b9043b554963320280ff7783773652 Author: Field G. Van Zee Date: Fri Jul 19 17:15:03 2013 -0500 CHANGELOG update (for 0.0.9). commit 0680916fdd532f7a4716b11a2515243b2c08d00f Author: Field G. Van Zee Date: Thu Jul 18 18:04:34 2013 -0500 Added BLAS error checking to compatibility layer. Details: - Added frame/compat/check directory, which now houses companion _check() routines for each of the BLAS wrappers in frame/compat. These _check() routines are called from the compatibility wrappers and mimic the error-checking present in the netlib BLAS. - Edited bla_xerbla.c so that xerbla() translates the operation string to uppercase before printing. - Redefined util routines in frame/compat/f2c/util in terms of level0 macros. - Added prototypes for util routines, f2c routines, lsame(), and xerbla(). - Commented out prototypes in test/test_*.c since Fortran integers are now int64_t by default (and the prototypes that were present in the files used int). - Removed redundant #include "bli_f2c.h" in bli_?lamch.c and bli_lsame.c, since blis.h was already being included. - Other minor changes to code in frame/compat/f2c. commit 4e80ad28c97273db3366428ec44020da7944964d Author: Field G. Van Zee Date: Thu Jul 18 17:53:31 2013 -0500 Added support for C99 complex types/arithmetic. Details: - Added support for C99 complex types to bli_type_defs.h and overloaded complex arithmetic to the scalar-level macros in include/level0. This includes a somewhat substantial reorganization and re-layering of much of the existing machinery present in the level0 macros. - Added new #define for BLIS_ENABLE_C99_COMPLEX to bli_config.h files, commented-out by default, which optionally enables the use of built-in C99 complex types and arithmetic. - Minor changes to clarksville and reference configs' make_defs.mk files. - Removed macro definitions from bli_param_macro_defs.h which was not being used (bli_proj_dt_to_real_if_imag_eq0). commit 6072d7c848e837ba20d607f7b727438ada31bdcf Author: Field G. Van Zee Date: Wed Jul 17 12:27:45 2013 -0500 Fixed bugs in trsm, trmm macro-kernels. Details: - Fixed a bug in trsm_rl_ker_var2() caused by incorrect edge case handling. - Fixed a bug in trsm_rl_ker_var2() and trsm_ru_ker_var2() whereby k was incorrectly being adjusted upward by MR, instead of NR. The rl and ru trmm macro-kernels were updated in a similar fashion. - Fixed a bug in trsm_ru_ker_var2() that was due to a missing negation on diagoffb when recomputing k to skip a zero region below where the diagonal intersects the right side of the block. The corresponding trmm macro-kernel was also updated. - Fixed a bug in trsm_ru_ker_var2() where the the adjustment of k (by NR) needed to be placed AFTER the block that recomputes k to skip the zero region (if present). The other three trsm macro-kernels, as well as the trmm macro-kernels, were updated in the same manner, for consistency. - Fixed a bug in trmm_lu_ker_var2() in which the wrong dimension (n) was being updated to skip a zero region to the left of where the diagonal of A intersects the top edge of the block. - Comment updates to all trsm and trmm macro-kernels. - Comment updates to bli_packm_init.c. commit 47410a48f9b91e94ce4c67633686ffd1f2ad0275 Author: Field G. Van Zee Date: Wed Jul 10 14:53:59 2013 -0500 Added f2c'ed Givens rotation wrappers. Details: - Retired (for now) existing ?rot*() BLAS compatibility wrappers to 'attic' along with other wrappers for which no BLIS implementation exists. - Added f2c-generated codes for applicable datatype flavors of rot, rotg, rotm, and rotmg operations. commit e5f90f3a8dbe671104bcb9d8b4e3409de01805da Author: Field G. Van Zee Date: Wed Jul 10 13:40:12 2013 -0500 Removed copynz defs from bli_kernel.h files. Details: - Removed COPYNZ_KERNEL definition from the bli_kernel.h files in each configuration. (Meant to include this in previous commit.) commit aec12d90f596e8c04b1ad178258a1cd38108f59d Author: Field G. Van Zee Date: Wed Jul 10 13:33:30 2013 -0500 Removed copynzv, copynzm and related codes. Details: - Removed copynzv and copynzm operation directories. These operations implemented a variation of copyv/m that, in the case of real source and complex destination operands, leaves the imaginary component untouched (rather than setting it to zero). I realize now that the special case(s) (e.g. gemm with real A and B but complex C) that I thought required this operation actually can be handled more simply. - Removed level0 scalar macros implementing copynzs, copynzjs. commit b0a0a0f274a761788531b5d281cc3b411b7124ed Author: Field G. Van Zee Date: Tue Jul 9 17:15:38 2013 -0500 Added handling of restrict, stdint.h for non-C99. Details: - Removed the #include from blis.h and inserted a cpp macro block in bli_type_defs.h that #includes for C++ and C99, and otherwise manually typedefs the types we need (which, for now, are unconditionally int64_t and uint64_t). - Moved basic typedefs to top of bli_type_defs.h, and comment changes. - Added cpp macro block to bli_macro_defs.h that #defines restrict as nothing for C++ and non-C99. commit 4b7e7970f1af4a1ab121e07657e2b78b9fcd7671 Author: Field G. Van Zee Date: Mon Jul 8 15:20:34 2013 -0500 Migrated integer usage to stdint.h types. Details: - Changed the way bli_type_defs.h defines integer types so that dim_t, inc_t, doff_t, etc. are all defined in terms of gint_t (general signed integer) or guint_t (general unsigned integer). - Renamed Fortran types fchar and fint to f77_char and f77_int. - Define f77_int as int64_t if a new configuration variable, BLIS_ENABLE_BLIS2BLAS_INT64, is defined, and int32_t otherwise. These types are defined in stdint.h, which is now included in blis.h. - Renamed "complex" type in f2c files to "singlecomplex" and typedef'ed in terms of scomplex. - Renamed "char" type in f2c files to "character" and typedef'ed in terms of char. - Updated bla_amax() wrappers so that the return type is defined directly as f77_int, rather than letting the prototype-generating macro decide the type. This was the only use of GENTFUNC2I/GENTPROT2I-related macros, so I removed them. Also, changed the body of the wrapper so that a gint_t is passed into abmaxv, which is THEN typecast to an f77_int before returning the value. - Updated f2c code that accessed .r and .i fields of complex and doublecomplex types so that they use .real and .imag instead (now that we are using scomplex and dcomplex). commit 372501398564fdba3d5a3db86c30bc1039b185ff Author: Field G. Van Zee Date: Mon Jul 8 11:24:18 2013 -0500 Added experimental bli_gemm_ker_var5(). Details: - Added support for an experimental gemm macro-kernel incrementally packs one micro-panel of B at a time. This is useful for certain special cases of gemm where m is small. - Minor changes to default values of clarksville configuration. - Defined BLIS_PACKED_BLOCKS as part of pack_t type, even though we do not yet have any use (or implementation support) for block storage. - Comment update to bli_packm_init.c. commit 9915d667a79f23e3a2a2516247c560e9063a1646 Author: Field G. Van Zee Date: Sun Jul 7 13:28:39 2013 -0500 Defined "total" blocksize query functions. Details: - Defined bli_blksz_total_for_type() and bli_blksz_total_for_obj() to query the default blocksize plus blocksize extension (using the type or the type of an object). - Comment update in bli_packm_cxk.c. commit 46d3d09d49aded1d9f1b468c83fce75e07d631dc Author: Field G. Van Zee Date: Thu Jun 27 13:19:56 2013 -0500 Consolidated lower/upper her[2]k blocked variants. Details: - Consolidated lower and upper blocked variants for herk and her2k, and renamed the resulting variants, according to the same changes recently made to trmm and trsm. - Implemented support for four new subpartitions types: BLIS_SUBPART1T BLIS_SUBPART1B BLIS_SUBPART1L BLIS_SUBPART1R which correspond to "merged" partitions that include the middle "1" partition as well as either the neighboring "0" or "2" partition. This is used to clean up code in herk/her2k var2 that attempts to partition away the strictly zero region above or below the diagonal of a matrix operand that is being marched through diagonally. - Added safeguards to herk macro-kernels that skip any leading or trailing zero region in the panel of C that is passed in. This is now needed given that herk/her2k var1 no longer partitions off this zero region before calling the macro-kernel (via bli_her[2]k_int()). - Updated comments and other whitespace changes to trmm/trsm macro-kernels. commit 02002ef6f3d2746665982793db36714bd69bccc9 Author: Field G. Van Zee Date: Mon Jun 24 17:08:14 2013 -0500 Added row-storage optimizations for trmm, trsm. Details: - Implemented algorithmic optimizations for trmm and trsm whereby the right side case is now handled explicitly, rather than induced indirectly by transposing and swapping strides on operands. This allows us to walk through the output matrix with favorable access patterns no matter how it is stored, for all parameter combinations. - Renamed trmm and trsm blocked variants so that there is no longer a lower/upper distinction. Instead, we simply label the variants by which dimension is partitioned and whether the variant marches forwards or backwards through the corresponding partitioned operands. - Added support for row-stored packing of lower and upper triangular matrices (as provided by bli_packm_blk_var3.c). - Fixed a performance bug in bli_determine_blocksize_b() whereby the cache blocksize extensions (if non-zero) were not being used to appropriately size the first iteration (ie: the bottom/right edge case). - Updated comments in bli_kernel.h to indicate that both MC and NC must be whole multiples of MR AND NR. This is needed for the case of trsm_r where, in order to reuse existing left-side gemmtrsm fused micro-kernels, the packing of A (left-hand operand) and B (right-hand operand) is done with NR and MR, respectively (instead of MR and NR). commit d1e81ddc848ee47bc188735883d14582bdd0cabc Author: Field G. Van Zee Date: Thu Jun 13 11:14:21 2013 -0500 Minor generalizing tweaks to trmm blk var1, var2. commit 0efb7974f104206ba3985276f2180a9b14fe9f9b Author: Field G. Van Zee Date: Wed Jun 12 16:40:04 2013 -0500 CHANGELOG update. commit 5b641c3bab31eac6a1795b9f6e3f86c59651ca50 Author: Field G. Van Zee Date: Wed Jun 12 16:02:12 2013 -0500 Use separate CFLAGS for "kernels" directories. Details: - Added a new "special" directory type: any source code within directories named "kernels" will be compiled with a separate CFLAGS_KERNELS set of compiler flags. This allows the developer to specify a separate set of flags (e.g. optimization flags) for compiling kernels while maintaining a standard set for regular framework code. - Fixed a bug in the top-level Makefile that was causing "noopt" code to be compiled with the standard set of compilation flags. - Updated make_defs.mk in reference, flame, and clarksville configurations according to above changes. commit 08475e7c7653ba598665071a617d10f0d8f763c2 Author: Field G. Van Zee Date: Tue Jun 11 12:18:39 2013 -0500 Various level-3 optimizations for row storage. Details: - Implemented remaining two cases within bli_packm_blk_var2(), which allow packing from a lower or upper-stored symmetric/Hermitian matrix to column panels (which are row-stored). Previously one could only pack to row panels (which are column-stored). - Implemented various optimizations in the level-3 front-ends that allow more favorable access through row-stored matrices for gemm, hemm, herk, her2k, symm, syrk, and syr2k. - Cleaned up code in level-3 front-ends that has to do with setting target and execution datatypes. commit 05a657a6b92e8d34efa5c57ae6a18a4f35ec0841 Author: Field G. Van Zee Date: Fri Jun 7 11:04:10 2013 -0500 Added beta == 0 optimization to x86_64 ukernel. Details: - Modified x86_64 gemm microkernel so that when beta is zero, C is not read from memory (nor scaled by beta). - Fixed minor bug in test suite driver when "Test all combinations of storage schemes?" switch is disabled, which would result in redundant tests being executed for matrix-only (e.g. level-1m, level-3) operations if multiple vector storage schemes were specified. - Restored debug flags as default in clarksville configuration. commit f1aa6b81cc421516dd77dd0f18f7c432724e6ef2 Author: Field G. Van Zee Date: Thu Jun 6 13:36:06 2013 -0500 Whitespace changes to old test drivers. Details: - Replaced tabs with four spaces in places where indention was already in place. commit 9feb4c23d2e36f3d8b5417a3802c69f94b29f749 Author: Field G. Van Zee Date: Tue Jun 4 14:57:46 2013 -0500 Fixed unaligned handling in axpyf, dotxaxpyf. Details: - Fixed over-cautious handling of unaligned operands in vector instrinsic implementation of axpyf kernel. - Fixed over- and under-cautious handling of unaligned operands in vector intrinsic implementation of dotxaxpyf kernel. commit 22b06cfcd2e3205c8325a246c2279e4b1047c066 Author: Field G. Van Zee Date: Mon Jun 3 16:54:52 2013 -0500 Updated level-1/-1f [vector intrinsic] kernels. Details: - Updated level-1/-1f kernels so that non-unit and un-aligned cases are handled by reference implementation (rather than aborted). - Added -fomit-frame-pointer to default make_defs.mk for clarksville configuration. - Defined bli_offset_from_alignment() macro. - Minor edits to old test drivers. commit 0288c827d3659bb225ac9c10f168b623ed0106a2 Author: Field G. Van Zee Date: Sat Jun 1 08:02:23 2013 -0500 Updated ukernels for x86_64. Details: - Tweaked micro-kernels and configuration for clarksville. - Updated/cleaned up old test drivers in test directory. - Fixed syntax bug in trsv_unb_var1 and trsv_unf_var1 (introduced recently). commit 85a6d1c9a52c2b27c71a3a3e341c51d7ba263749 Author: Field G. Van Zee Date: Mon May 6 11:05:08 2013 -0500 Replaced axpys usage with subs in trsv. Details: - Replaced instances of axpys with alpha equal to -1 with subs. - Use BLIS_MAX_TYPE_SIZE to define BLIS_CONSTANT_SLOT_SIZE instead of sizeof(dcomplex). commit 2d9c667f3c48a12cab64e5ad09d5fcb9f4c19d78 Author: Field G. Van Zee Date: Fri May 24 16:28:10 2013 -0500 Fixed x86_64 kernel bugs and other minor issues. Details: - Fixed bugs in trmv_l and trsv_u due to backwards iteration resulting in unaligned subpartitions. We were already going out of our way a bit to handle edge cases in the first iteration for blocked variants, and this was simply the unblocked-fused extension of that idea. - Fixed control tree handling in her/her2/syr/syr2 that was not taking into account how the choice of variant needed to be altered for upper-stored matrices (given that only lower-stored algorithms are explicitly implemented). - Added bli_determine_blocksize_dim_f(), bli_determine_blocksize_dim_b() macros to provide inlined versions of bli_determine_blocksize_[fb]() for use by unblocked-fused variants. - Integrated new blocksize_dim macros into gemv/hemv unf variants for consistency with that of the bugfix for trmv/trsv (both of which now use the same macros). - Modified bli_obj_vector_inc() so that 1 is returned if the object is a vector of length 1 (ie: 1 x 1). This fixes a bug whereby under certain conditions (e.g. dotv_opt_var1), an invalid increment was returned, which was invalid only because the code was expecting 1 (for purposes of performing contiguous vector loads) but got a value greater than 1 because the column stride of the object (e.g. rho) was inflated for alignment purposes (albeit unnecessarily since there is only one element in the object). - Replaced some old invocations of set0 with set0s. - Added alpha parameter to gemmtrsm ukernels for x86_64 and use accordingly. - Fixed increment bug in cleanup loop of gemm ukernel for x86_64. - Added safeguard to test modules so that testing a problem with a zero dimension does not result in a failure. - Tweaked handling of zero dimensions in level-2 and level-3 operations' internal back-ends to correctly handle cases where output operand still needs to be scaled (e.g. by beta, in the case of gemm with k = 0). commit d57ec42b34f8447c88adeffa95cf22f8c115ad51 Author: Field G. Van Zee Date: Fri May 3 17:35:32 2013 -0500 Renamed _trans_status() macro. Details: - Mistakenly forgot to rename the _trans_status() macro and instances in previous commit. commit 9e2b227866af429a4a6fb7dbb8c457bbdda2f136 Author: Field G. Van Zee Date: Fri May 3 17:24:58 2013 -0500 Renamed _set_trans(), _trans_status() macros. Details: - Renamed the following macros: bli_obj_set_trans() -> bli_obj_set_onlytrans() bli_obj_trans_status() -> bli_obj_onlytrans_status() to remove ambiguity as to which bits are read/updated. commit 2f8174509ea9f844db11ebd9389de5168e85b132 Author: Field G. Van Zee Date: Wed May 1 15:06:30 2013 -0500 Unconditionally check memory pool(s) for errors. Details: - Changed bli_mem_acquire_m() in bli_mem.c so that we still check if the memory pool is exhausted before checking out and returning a block, even if BLIS error checking has been disabled. These errors are useful because they likely indicate that BLIS was improperly configured for the code being run. commit 75405a2b83679b6aff38d7e7425199d623a7b0a9 Author: Field G. Van Zee Date: Wed May 1 15:00:30 2013 -0500 CHANGELOG update. commit 6bfa96f84887dec0b4cf8be5d38dd634c2f8951d Author: Field G. Van Zee Date: Tue Apr 30 19:35:54 2013 -0500 Absorbed blocksize extensions into main objects. Details: - Revamped some parts of commit b6ef84fad1c9 by adding blocksize extension fields to the blksz_t object rather than have them as separate structs. - Updated all packm interfaces/invocations according to above change. - Generalized bli_determine_blocksize_?() so that edge case optimization happens if and only if cache blocksizes are created with non-zero extensions. - Updated comments in bli_kernel.h files to indicate that the edge case blocksize extension mechanism is now available for use. commit bc7c8005cedbe50961ac2a99aeeabf4e9f9a8e9e Author: Field G. Van Zee Date: Thu Apr 25 17:16:59 2013 -0500 Added option to disable err checking in testsuite. Details: - Added a new line to input.general that allows one to specify the error- checking level to use for each BLIS experiment. The only two levels supported for now are "no error checking" and "full error checking". commit 096b366ddcfe386f44419ef84d8df8be13825f86 Author: Field G. Van Zee Date: Thu Apr 25 16:43:43 2013 -0500 Use cntl trees that block in n dimension. Details: - Updated _cntl.c files for each level-3 operation to induce blocked algorithms that first paritition in the n dimension with a blocksize of NC. Typically this is not an issue since only very large problems exceed that of NC. But developers often run very large problems, and so this extra blocking should be the default. - Removed some recently introduced but now unused macros from bli_param_macro_defs.h. commit b6e24b23cb4dfc488c1c9c70d596539c2287f72e Author: Field G. Van Zee Date: Thu Apr 25 12:06:12 2013 -0500 Use PASTEMAC in macro-kernels (over MAC2 or MAC3). Details: - Replaced multi-type invocations of copys_mxn, xpbys_mxn, etc. (PASTEMAC2 and PASTEMAC3) with those that only use a single type (PASTEMAC). - Added extra macros to bli_adds_mxn_uplo.h and bli_xpbys_mxn_uplo.h to accommodate above change. - Fixed comment typo in bli_config.h files. - Added .nfs* pattern to .gitignore. commit df80acf517dde180ddcc5835c6136b2fa7556d4b Author: Field G. Van Zee Date: Tue Apr 23 19:43:23 2013 -0500 Fixed computation of b_next in L3 macro-kernels. Details: - Restructured herk_l and herk_u macro-kernels in the imagine of trmm and trsm, in that the edge cases are captured by the main loop, rather than trying to have "cleanup" sections that result in four distinct parts (interior, bottom edge, right edge, bottom-right edge) of the code. - Fixed the way b_next was being computed in the non-gemm level-3 macro-kernels (herk, trmm, trsm). The way they are computed now matches that of gemm. commit 3671528cf8efe4b445d196665143a5c50c2c6048 Author: Field G. Van Zee Date: Tue Apr 23 19:12:14 2013 -0500 Fixed minor bug in computing b_next in gemm. commit db072a5b4a039a9a668ef951333ecfb5bd3a74b9 Author: Field G. Van Zee Date: Tue Apr 23 17:49:10 2013 -0500 Fixed rare edge case bug in herk_l macro-kernel. Details: - Fixed a potential bug in herk_l at the m_left edge case. If MR was chosen to be much larger than NR, then one could encounter edge cases in the the MC dimension that fall entirely below the diagonal, which the previous implementation of the herk_l macro-kernel was not allowing for. commit 1dab11e37d1cb403cbe75b73a644c00de534f104 Author: Field G. Van Zee Date: Tue Apr 23 17:17:11 2013 -0500 Updated x86 gemmtrsm ukernels to use alpha. commit 9d10d7dd9bc92a993fea7162bfa5983f75506f49 Author: Field G. Van Zee Date: Tue Apr 23 16:00:18 2013 -0500 Added a_next, b_next arguments to micro-kernels. Details: - Added two more arguments to the gemm and gemmtrsm microkernels: the addresses of the next micro-panels of A and B. By passing these pointers into the micro-kernel, we allow the micro-kernel author to prefetch micro-panels of A and B as necessary (though this is completely optional; these addresses may also be safely ignored). - Updated all seven macro-kernels so that they compute and pass in a_next and b_next. Note that ONLY the gemm macro-kernel computes a_next and b_next with the precise semantics we want. I will go back and fix the other macro-kernels in the near future. - Added 'restrict' to various micro-kernels from which it was missing. commit f3815dc84d385c514a5acaf1e925424a57be2f51 Author: Field G. Van Zee Date: Tue Apr 23 11:12:33 2013 -0500 Added code for backward edge-case blocking. Disabled: - Edited bli_determine_blocksize_b() to include experimental (and currently disabled) code that computes extended blocks. - Updated commnts relate to above changes. - Enabled use of x86 gemmtrsm ukernel in config/flame/bli_kernel.h. commit 4fe1435f20e8fc7dd72f795ac58c8e236e6c631b Author: Field G. Van Zee Date: Mon Apr 22 19:00:43 2013 -0500 Updated dupl implementation to use PACKNR and NR. Details: - Updated frame/util/dupl/bli_dupl_unb_var1.c to utilize PACKNR and NR explicitly so navigate b1 so that situations where PACKNR > NR are supported. - Moved the 4x2 and 4x4 reference micro-kernels in frame/3/gemm/ukernels and frame/3/trsm/ukernels to kernels/c99/. - Updated clarksville and flame configurations. commit 2d6f9e83799a46d52d7901e275f8fd67f0a0edc6 Author: Field G. Van Zee Date: Sun Apr 21 15:10:34 2013 -0500 Disabled blocksize checks for memory pools. Details: - Temporarily disabled checks that ensure that enough memory will be allocated by the contiguous memory allocator for all types, given that the values for double precision real are the ones used to allocate the space. These checks can easily go awry in certain situations, especially if you are developing for only one datatype. So for now, they are probably more trouble than they are worth. commit b6ef84fad1c9884c84b7f1350a0bcdfe1737e8f2 Author: Field G. Van Zee Date: Sun Apr 21 15:00:24 2013 -0500 Allow ldim of packed micro-panels != MR, NR. Details: - Made substantial changes throughout the framework to decouple the leading dimension (row or column stride) used within each packed micro-panel from the corresponding register blocksize. It appears advantageous on some systems to use, for example, packed micro-panels of A where the column stride is greater than MR (whereas previously it was always equal to MR). - Changes include: - Added BLIS_EXTEND_[MNK]R_? macros, which specify how much extra padding to use when packing micro-panels of A and B. - Adjusted all packing routines and macro-kernels to use PACKMR and PACKNR where appropriate, instead of MR and NR. - Added pd field (panel dimension) to obj_t. - New interface to bli_packm_cntl_obj_create(). - Renamed bli_obj_packed_length()/_width() macros to bli_obj_padded_length()/_width(). - Removed local #defines for cache/register blocksizes in level-3 *_cntl.c. - Print out new cache and register blocksize extensions in test suite. - Also added new BLIS_EXTEND_[MNK]C_? macros for future use in using a larger blocksize for edge cases, which can improve performance at the margins. commit 59fca58dbe678d79c1df0916b022afbeac7c48fa Author: Field G. Van Zee Date: Fri Apr 19 15:26:29 2013 -0500 Fixed bug in compatibility layer (her2k/syr2k). Details: - Fixed a bug in the BLAS compatibility layer, specifically in bla_her2k.c and bla_syr2k.c, that caused incorrect computation to occur when the BLAS interface caller requests the [conjugate-]transpose case. Thanks to Bryan Marker for reporting the behavior that led to this bug. commit 09eacbd1ab1380a95a0e9625726b45e43ed102d6 Author: Field G. Van Zee Date: Thu Apr 18 19:39:13 2013 -0500 Changed old level3 test drivers to call front-ends. Details: - Changed old level-3 test drivers, in 'test' directory, to always call the front-end object API instead of the internal back-end with the locally defined control tree. commit 83e45de23e565138b8fde06fb11cfedc973b7246 Author: Field G. Van Zee Date: Thu Apr 18 18:33:03 2013 -0500 Allow packm_init() to reacquire a too-small mem_t. Details: - Changed bli_packm_init() to react differently to a situation where a pack obj_t has an already-allocated mem_t entry that has a buffer that is smaller than what will be needed to hold the block/panel that now needs to be packed. Previously, this situation was treated with an abort() since I assumed something was horribly wrong. I have changed the code so that it now reacts by releasing the previous mem_t and re-acquires a new mem_t with the new information. (This change was done at the request of Bryan Marker to facilitate code generation via DxT.) commit a6990434173b0cf651f8521194f3aef738deb7d2 Author: Field G. Van Zee Date: Thu Apr 18 13:52:47 2013 -0500 Fixed bug in packing block of A for hemm/symm. Details: - Fixed a bug in bli_packm_blk_var2() that affected the packing functionality of hemm and symm. The bug occurs whenever attempting to pack a Hermitian or symmetric matrix where the block of A being packed intersects the diagonal, but some of its micro-panels do not intersect the diagonal and lie completely in the unstored region. Thanks to Francisco Igual for reporting this bug. - Comment updates to both _blk_var2.c and _blk_var3.c. commit c92e7590e1934f830814ab614c794215ebe0c415 Author: Field G. Van Zee Date: Wed Apr 17 20:53:29 2013 -0500 Activated bli_packm_acquire_mpart_t2b(). Details: - Removed the overly-paranoid bli_abort() from the end of bli_packm_acquire_mpart_t2b(), to allow others to experiment with partitioning through packed blocks of A. Also, and more importantly, changed an earlier check that was causing an erroneous (but coincidentally redundant) abort(). Also, updated some of the comments in bli_packm_part.c. commit bea579e9f009a44e08008eb14d09f38748ab2b53 Author: Field G. Van Zee Date: Tue Apr 16 19:43:14 2013 -0500 Allow creation of "empty" objects. Details: - Modified bli_obj_alloc_buffer() to allow allocating an empty buffer, and modified bli_adjust_strides() to explicitly handle m = n = 0. - Updated bli_check_matrix_strides() to allow cases where m = n = 0. commit 7904e20f2e6908571ee5008da2a08084198eefae Author: Field G. Van Zee Date: Tue Apr 16 17:37:16 2013 -0500 Fixed "root" object bug in bli_her[2]k/syr[2]k. Details: - Fixed an obscure bug in the front-ends for herk, her2k, syrk, and syr2k, that manifested as the incorrect triangle being updated. It occurred when the user would pass in a matrix object that was correctly marked as symmetric/Hermitian and lower-stored, but whose root object was never marked as lower (or upper). We now alias and re-assign root status for matrix C within the front-ends. Note that trmm and trsm were already doing this, albeit for a slightly different reason (to allow the internal back-end to choose which algorithm to run--lower or upper--based on the uplo of the root object for both left and right side cases). Thanks to Bryan Marker for leading me to this bug. commit 19155a768dd97b57cfb59c32fa8e54a344ec66e1 Author: Field G. Van Zee Date: Tue Apr 16 11:24:03 2013 -0500 Fixed overzealous type-checking in bli_getsc(). Details: - Relaxed type checking in getsc so that the input object could be a constant and not just a proper floating-point type. (If it is a constant, default to extracting the dcomplex values.) Thanks to Bryan Marker for reporting this bug. - Added definition for bli_is_constant() in bli_param_macro_defs.h - Comment updates to various level-0 scalar routines. commit 2ee6bbca2953d04c967685da9735b3eaf8a4b813 Author: Field G. Van Zee Date: Mon Apr 15 19:27:57 2013 -0500 Fixed bug in bli_obj_is_packed() and renamed. Details: - This macro is used to determine whether the partitioning routines should call a corresponding packm_part routine instead. However, it was unintentionally catching matrices that were marked as "packed" by virtue of them simply being marked as BLIS_PACKED_UNSPEC in, say, bli_gemv(). The macro has now been renamed to bli_obj_is_panel_packed(), and now only checks for row or column panel packing. (Note that I first attempted to fix this bug in a571af816d72.) Thanks to Bryan Marker for reporting the erroneous behavior that led me to this bug. commit 99b99eebe70336b5f28039a4a084aa7f5fa7059d Author: Field G. Van Zee Date: Mon Apr 15 17:54:43 2013 -0500 Removed local reference ukernel blocksize macros. Details: - Removed locally defined gemm microkernel blocksize macros from _mxn reference microkernel definition and header. Meant to include this in a recent/previous commit (0020ef7c8271). commit 6a538fa7b164655f41cea5b9c8d3902438bda66b Author: Field G. Van Zee Date: Mon Apr 15 14:40:31 2013 -0500 Formatting change to mods in previous commit. commit ea079d35591e808971d2d98a1a7d9f89bc1f7c2f Author: Field G. Van Zee Date: Mon Apr 15 14:31:40 2013 -0500 Set structure of objects in level-2 BLIS APIs. Details: - Added missing statement to set structure field of local objects in top-level BLIS (BLAS-like) API wrappers. Thanks to Bryan Marker for reporting this bug. commit d9948c541c0446e20e249a1ccc83709ce51b7aa8 Author: Field G. Van Zee Date: Mon Apr 15 10:21:26 2013 -0500 Tweak to test suite function string construction. Details: - Fixed a minor bug in the way that the test suite would construct function name strings when the user anchored all parameters in input.operations. In this case, the test driver would mistake this situation for one where the operation simply had no parameters to begin with, and thus would not include the parameter string in the function string that is output for every result. commit ca9e435c57c5c7a000d2a32681dd8070ba850abd Author: Field G. Van Zee Date: Mon Apr 15 09:59:46 2013 -0500 Fixed a bug in reference implementation of dupl. Details: - Fixed a bug in reference implementation of dupl (bli_dupl_unb_var1.c), which resulted in incorrect duplication. - Updated old test drivers according to recently updated packm control tree creation interface. - Added 'restrict' to x86 gemm microkernel interface. commit 26cbd52e364bbe439e3744101cd5a6cbcb82dffd Author: Field G. Van Zee Date: Sun Apr 14 19:05:33 2013 -0500 Modified bli_kernel.h include order in blis.h. Details: - Delayed #include of bli_kernel.h in blis.h to prevent a situation where _kernel.h includes an optimized microkernel header, which uses BLIS types such as dim_t and inc_t, which would precede the definition of those types in bli_type_defs.h. - Moved the #include of bli_kernel_macro_defs.h in bli_macro_defs.h to blis.h (immediately after that of bli_kernel.h). commit 3414a23c38b0de45a8034b3dda2fc4b5a755e4e1 Author: Field G. Van Zee Date: Sat Apr 13 16:53:16 2013 -0500 CHANGELOG update. commit ec16c52f2ecf419c749175ce0a297441c10f1c68 Author: Field G. Van Zee Date: Sat Apr 13 16:41:16 2013 -0500 Updated INSTALL file (now redirects to website). commit 0020ef7c82711a7ebf08e5174f939bee2563184c Author: Field G. Van Zee Date: Sat Apr 13 15:26:35 2013 -0500 Removed gemmtrsm-, trsm-specific blocksize macros. Details: - Modified gemmtrsm micro-kernel wrappers to use new aliased blocksize macros instead of operation-specific ones. - Removed local, gemmtrsm-specific blocksize macro definitions found in micro-kernel header files. (Meant to include above changes in 31b100e7bf4a.) - Added comments to reference gemmtrsm micro-kernel wrapper implementation. commit 1a9f427b85bb95aaa9e54c8ff8ecad8734b361ee Author: Field G. Van Zee Date: Fri Apr 12 15:25:54 2013 -0500 Added/renamed alignment constants to _config.h. Details: - Added new memory alignment constants: BLIS_HEAP_STRIDE_ALIGN_SIZE (previously assumed to be same as SYSTEM_MEM) BLIS_CONTIG_ADDR_ALIGN_SIZE (previously assumed to be same as PAGE_SIZE) BLIS_STACK_BUF_ALIGN_SIZE (previously not enforced) and renamed existing ones BLIS_SYSTEM_MEM_ALIGN_SIZE -> BLIS_HEAP_ADDR_ALIGN_SIZE BLIS_CONTIG_MEM_ALIGN_SIZE -> BLIS_CONTIG_STRIDE_ALIGN_SIZE to better convey what the alignment factor is used for (and what it is not used for). - Removed BLIS_ENABLE_SYSTEM_MEM_ALIGN. Dynamic memory alignment is now disabled by setting BLIS_HEAP_STRIDE_ALIGN_SIZE to 1. - Inserted instances of __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))) into macro-kernels to specify stack alignment of temporary buffers. - Modified test suite driver to output new constants. - Removed bli_align_dim_to_sys() and bli_align_dim_to_cmem(). Instead, we now use bli_align_dim_to_size(), which takes a third argument (the desired alignment). commit a77d10e87e3c0ab55ec14d74c285bc95c06285c3 Author: Field G. Van Zee Date: Fri Apr 12 11:40:55 2013 -0500 Fixed an bug in axpyv/axpym when alpha is unit. Details: - Fixed bug whereby axpyv and axpym were incorrectly simplifying to a copy, rather than an add, when alpha = 1. Thanks to Bryan Marker for identifying this bug. commit 0495bd1d6de5995fe2fb79b321eec79e961eb7a5 Author: Field G. Van Zee Date: Thu Apr 11 16:39:25 2013 -0500 Moved _POSIX_C_SOURCE def to compiler cmd line. Details: - Removed the #define of _POSIX_C_SOURCE in bli_config.h (for both reference and clarksville configurations) and added "-D_POSIX_C_SOURCE=200112L" to the compiler command line arguments in make_defs.mk (for both configs). Thanks to Devin Matthews for suggesting this change. commit d43d1a0a2ef6de4bc57627566aef8e3fdb458b8c Author: Field G. Van Zee Date: Thu Apr 11 16:28:17 2013 -0500 Appended 'f2c_' to abs, min, max macros in f2c.h. Details: - Renamed abs, min, max, dmin, and dmax macros in bli_f2c.h so that they would not conflict with anything defined by the user (or the language). Thanks to Devin Matthews for suggesting this fix. - Updated all instances of the above macros accordingly. commit 31b100e7bf4aeaa4ceafefd2b6c3102d5fbc4cbb Author: Field G. Van Zee Date: Thu Apr 11 11:11:52 2013 -0500 Added new kernel blocksize macro aliases. Details: - Added new macros that alias level-3 cache and register blocksize macros to names that can be constructed via the PASTEMAC macro. These aliased macro definitions live inside bli_kernel_macro_defs.h, which is now #included after bli_kernel.h. - Modified macro-kernels to use new aliased blocksize macros instead of operation-specific ones. - Removed local, operation-specific kernel blocksize macro definitions (found in macro-kernel header files). commit bd2b24ba65b36d7c07c5918a3838ce2ff57c4b48 Author: Field G. Van Zee Date: Thu Apr 11 10:35:39 2013 -0500 Updated CREDITS file. commit 79328c15410215737f3f14cd069328cf52aa11fd Author: Field G. Van Zee Date: Thu Apr 11 10:32:14 2013 -0500 Reverted testsuite object files' home to 'obj'. Details: - Removed 'obj' and 'lib' from .gitignore. - Added testsuite/obj/.gitkeep (which is an empty file). - Updated testsuite/Makefile accordingly. - Thanks to Vernon Austel for pointing out the .gitkeep trick to tracking empty directories in git. commit 4afe3bfd82c03e1e97b58b7d250588a0d28541e5 Author: Field G. Van Zee Date: Tue Apr 9 17:45:39 2013 -0500 Renamed/moved object scalar constant macros. Details: - Replaced scalar constant macro definitions in bli_const_defs.h with a single, simplier macro in bli_obj_macro_defs.h. - Updated invocations of old macros accordingly. - Removed bli_const_defs.h. commit 357893f5be5c56ab7b062874005e77e614b23f06 Author: Field G. Van Zee Date: Tue Apr 9 14:48:15 2013 -0500 Applied fix from prev commit to gemmtrsm_?_ref_4x4 Details: - Fixed hard-coded kernels in bli_gemmtrsm_l_ref_4x4.c and bli_gemmtrsm_u_ref_4x4.c. commit 54988e8dca44475610bcaee5a7bc1c40e8921402 Author: Field G. Van Zee Date: Mon Apr 8 19:08:43 2013 -0500 Fixed a performance bug in trsm. Details: - Fixed a bug in the reference implementations of the gemmtrsm wrappers (bli_gemmtrsm_l_ref_mxn.c and bli_gemmtrsm_u_ref_mxn.c) whereby the reference gemm microkernel was hard-coded, and thus always called, even when GEMM_UKERNEL was defined to point to an optimzied microkernel. This manifested as artificially low trsm performance for all problem sizes, but especially for small problem sizes as it only affected blocks of A that intersected the diagonal. Thanks to Mike Kistler of IBM for helping me find this bug. commit a7252e40b5c351eef9a1df531ea0ef25cb5fb705 Author: Field G. Van Zee Date: Mon Apr 8 16:08:22 2013 -0500 Generate testsuite objects 'src'. Details: - Tweaked the testsuite makefile so that object files are stored in 'src' rather than 'obj', since (a) the top-level .gitignore dictates that obj directories are to be ignored, and (b) since git has problems tracking empty directories. Now, users do not need to create their own obj directories within their own local clones of BLIS. commit 803871c55b60d3c225ad9a0607fa507a9c16aab7 Author: Field G. Van Zee Date: Mon Apr 8 15:18:42 2013 -0500 Minor formatting changes. commit a571af816d72727e16cad37007e7043b9d6fa362 Author: Field G. Van Zee Date: Mon Apr 8 15:00:13 2013 -0500 Fixed definition of bli_is_packed_object() macro. Details: - Changed the definition of bli_is_packed_object() so that it keys off of the value of the pack schema bits in the info field of obj_t, rather than comparing the obj_t buffer with that of the mem_t entry. This was the cause of a very low probability bug whereby uninitialized memory caused the macro to evaluate to TRUE even though the object in question was not packed. Thanks to Vernon Austel of IBM for helping discover this bug. - Changed an abort() in bli_packm_part() to a not-yet-implemented. commit 3be14c32f735ecc6169d3ab6370cf8b69162acec Author: Field G. Van Zee Date: Sat Apr 6 12:54:45 2013 -0500 Updated information in testsuite output header. Details: - Added to the information that is echoed at the beginning of the test suite's output, and also re-labeled some existing information. commit 874707c1b183a4dd9a91dbfd4ea1522384c190df Author: Field G. Van Zee Date: Fri Apr 5 17:19:43 2013 -0500 Fixed edge case handling bug in herk macrokernels. Details: - Fixed a bug present in bli_herk_l_ker_var2() and bli_herk_u_ker_var2() that only manifests when BLIS is configured such that MR != NR. The bug involves incorrectly detecting edge cases, which resulted in some parts of matrix C potentially being skipped and not updated, depending on the problem size. - Updated the default values of MR and NR in config/reference/bli_kernel.h to 8 and 4, respectively, so that I can better stress the framework on a day-to-day basis. (The fact that they were both equal to 4 for so long is why I did not stumble upon this bug much sooner.) commit 7cbda15291d3e01300e71c286b9657b7ef0708bf Author: Field G. Van Zee Date: Thu Apr 4 15:25:43 2013 -0500 Added reference microkernels for arbitrary MR, NR. Details: - Added a new set of reference gemm, gemmtrsm, and trsm micro-kernels that contain explicit loops over MR and NR, thus allowing them to be used unmodified by developers who want to build a reference library with custom register blocksizes. - Changed config/reference/bli_kernel.h to use above ukernels by default. - Changed interfaces of new and existing gemm, gemmtrsm, and trsm micro-kernels to use 'restrict' keyword. - Added -funroll-loops option to config/reference/make_defs.mk. - Updated comments in bli_kernel.h describing constraints on register and cache blocksizes. - Updated _adds_mxn.h, _copys_mxn.h, and _xpbys_mxn.h macros files so that single-char macros are also defined. commit 6684b73d5501f91d24a79e26655a42819c9b3114 Author: Field G. Van Zee Date: Tue Apr 2 13:06:20 2013 -0500 Implemented amax operation and related changes. Details: - Implemented amax operation in BLIS. - Activated BLAS2BLIS routine mapping for new amax BLIS implementation. - Added integer support to [f]printv, [f]printm. - Added integer support to level-0 copys macros. - Updated printing of configuration information in test suite driver. - Comment changes to _config.h files. - Added comments to bla_dot.c to reminder reader what sdsdot()/dsdot() are used for. commit fb68087f8727cd5fd656a742a110e54fb1c91db9 Author: Field G. Van Zee Date: Tue Mar 26 15:10:16 2013 -0500 More memory alignment-related tweaks. Details: - Renamed BLIS_MEMORY_ALIGNMENT_SIZE to BLIS_CONTIG_MEM_ALIGN_SIZE. - Renamed BLIS_ENABLE_MEMORY_ALIGNMENT to BLIS_ENABLE_SYSTEM_MEM_ALIGN. - Added BLIS_SYSTEM_MEM_ALIGN_SIZE, which controls only the alignment passed into posix_memalign() or equivalent. - Defined new function, bli_align_dim_to_cmem(), which applies the contiguous memory alignment (rather than the system/malloc alignment). commit 9682ef61dbf9a8846c8b0826d4de24bc216cd641 Author: Field G. Van Zee Date: Tue Mar 26 14:14:53 2013 -0500 Always define memory alignment size cpp constant. Details: - Removed guard around #define for memory alignment size constant. Memory alignment should always be enabled, and so this value should always be defined. commit 3a787cccaae16531474f34398e3c0cf4f49b8cd8 Author: Field G. Van Zee Date: Tue Mar 26 13:59:19 2013 -0500 Renamed memory alignment macro constant. Details: - Renamed all occurrences of BLIS_MEMORY_ALIGNMENT_BOUNDARY to BLIS_MEMORY_ALIGNMENT_SIZE. commit 37308f9a502b56d94fa52a7df71c676a46c3be3d Author: Field G. Van Zee Date: Tue Mar 26 12:43:14 2013 -0500 Align packed panel strides with system alignment. Details: - Pass panel strides through bli_align_dim_to_sys() to ensure that each subsequent packed panel of A and B begins at an aligned address. (The first panel is presumably aligned to system alignment because it is aligned to a page boundary, which is typically much larger.) - Rearranged code in packm_init_pack() to prevent additional conditional blocks as a result of the aforementioned change. - Adjusted contiguous memory allocator so that the system memory alignment is used to allocate enough space for each block no matter what kind of register blocking is used (even if register blocksize is unit and every row/column needs maximal padding). - Adjusted default blocksizes in reference configuration so that MC*KC and KC*NC result in identical footprints for all datatypes. commit 40a0654ada5f256beb3da80ebba015a3c71fb61f Author: Field G. Van Zee Date: Sun Mar 24 20:18:12 2013 -0500 CHANGELOG update. commit b65cdc57d9e51fa00e3c03539cfb7e045707d0f4 Author: Field G. Van Zee Date: Sun Mar 24 20:01:49 2013 -0500 Migrated 'bl2' prefix to 'bli'. Details: - Changed all filename and function prefixes from 'bl2' to 'bli'. - Changed the "blis2.h" header filename to "blis.h" and changed all corresponding #include statements accordingly. - Fixed incorrect association for Fran in CREDITS file. commit 132bffcef7441f32d02cc7485aef6a0648e0ef1e Author: Field G. Van Zee Date: Sun Mar 24 18:49:36 2013 -0500 Removed several 'old' directories and files. Details: - Removed most of the 'old' directories scattered throughout the framework, which includes alternate/half-baked/broken implementations. commit 551ea4767a3ea6c263f12aaca94bc2642cee4cfa Author: Field G. Van Zee Date: Sun Mar 24 18:00:10 2013 -0500 Removed #include "blis2.h" from low-level headers. Details: - Removed #include of "blis2.h" from various lower-level, operation-specific header files throughout the framework. Given that these low-level headers are included within #blis2.h in a very specific order, #include'ing blis2.h within them directly is unnecessary. commit bc7b318ed0960edeb4537797dd8c91de0d942ca9 Author: Field G. Van Zee Date: Fri Mar 22 17:18:58 2013 -0500 Added cpp guards to conflicting libflame typedefs. Details: - Added cpp guards around the definitions of dim_t, scomplex, and dcomplex. This is a temporary hack to allow interoperability with libflame. (Similarly temporary changes are being made to libflame's type definitions file.) commit f469907503fcdc24dff0174c569170e6e756e045 Author: Field G. Van Zee Date: Fri Mar 22 15:20:15 2013 -0500 Renamed MAX_PREFETCH_BYTE_OFFSET to MAX_PRELOAD_. Details: - Renamed BLIS_MAX_PREFETCH_BYTE_OFFSET to BLIS_MAX_PRELOAD_BYTE_OFFSET since "prefetch" is kind of a loaded word (e.g. "prefetch" instructions, which are different than the particular kind of prefetching/preloading referred to by this constant). commit d1023bfbc6668a58a01ee4f82ded2319911e7b19 Author: Field G. Van Zee Date: Fri Mar 22 15:09:59 2013 -0500 Removed build/old directory. commit 718888849c48d99f83eea6b8f83bc1998cffef7e Author: Field G. Van Zee Date: Fri Mar 22 15:07:01 2013 -0500 Deprecated 'flame' configuration. Details: - Removed 'flame' configuration, as it was horribly out-of-date. - Comment changes to bl2_blocksize.c and bl2_mem.c. commit bba38cf4e9d28058c14483f44fa074a6d2852ad9 Author: Field G. Van Zee Date: Tue Mar 19 18:07:40 2013 -0500 Added missing conjbeta argument to scald. commit 1f82b51d06d0279dded3f2b87ba59403f3ed0af6 Author: Field G. Van Zee Date: Mon Mar 18 15:37:20 2013 -0500 Relocated packed mem_t dimension fields to obj_t. Details: - Removed the m and n (and elem_size) fields from the mem_t object, and added m_packed and n_packed fields to obj_t. These new fields track the same as the old ones. From an abstraction standpoint, it seemed awkward to store those dimensions inside the mem_t. - Updated interfaces to bl2_mem_acquire_*() so that only a byte size argument is passed in, instead of m, n, and elem_size. - Updated bl2_packm_init_pack() and bl2_packv_init_pack() to inline the functionality of bl2_mem_alloc_update_m() and bl2_mem_alloc_update_v(), respectively. - Updated packm variants to access the packed length and width fields from their new locations. commit 36c782857bf9b8ac1b1dac47a70f689a4407e2cc Author: Field G. Van Zee Date: Mon Mar 18 10:37:03 2013 -0500 CHANGELOG update. commit e7d41229d3b1674e74f47d7f29fae004a745201a Author: Field G. Van Zee Date: Fri Mar 15 17:12:36 2013 -0500 Re-implemented contiguous memory allocator. Details: - Completely re-wrote the contiguous memory allocator (bl2_mem.c). The new allocator instantiates and initializes three separate memory pool objects, each one associated with a separate array of contiguous memory blocks, each block of fixed and uniform size. (The three pools are for allocating mc-by-kc blocks of A, kc-by-nc panels of B, and mc-by-nc panels of C.) The pool objects use a stack structure internally to track which blocks in the region have been "checked out" to a thread and which are still available. Critical regions are now clearly marked and adaptable to parallel environments (e.g. OpenMP). Memory pools are set up when bl2_init() is called. - Added a new field to the packm control tree node, which indicates what kind of packed buffer is being allocated. The enumerated type for this argument is defined as packbuf_t in bl2_type_defs.h. - Updated level-3 _cntl.c files to pass in the appropriate value for a new packbuf_t argument to bl2_packm_cntl_obj_create(). - Moved some macros called by packm_init_pack() from bl2_obj_macro_defs.h to bl2_mem_macro_defs.h. - Added BLIS_MAX_NUM_THREADS to bl2_config.h, which we use as the default number of blocks of A reserved for the memory allocator. - Deprecated bl2_align_dim(). Replaced usage with that of bl2_align_dim_to_mult(). Turns out that typically we don't need to align a dimension to the system alignment, since that value has to do with starting addresses, whereas the values we are dealing with are unitless dimensions. commit 1e76cae00cb0a04544aaae1ade878686b238d283 Author: Field G. Van Zee Date: Fri Mar 15 12:21:42 2013 -0500 Perform her2k var1 loops in sequence. Details: - Changed variant 1 of her2k so that the two rank-k products are computed and accumulated in sequence rather than fused into one loop. This is necessary if BLIS is to be configured to provide only enough contiguous memory for one panel of B. commit c95c270eba91ae4efc26603beddfd0292caa919b Author: Field G. Van Zee Date: Thu Mar 7 14:42:15 2013 -0600 Enhanced tracking of dimensions for mem_t objects. Details: - Added new fields to mem_t struct definition to track the allocated (as opposed to the currently used) dimensions of the memory region. This allows packm_init() to be more robust in situations where memory is already allocated but is more than needed for the current packing job. - Updated logic in bl2_obj_set_buffer_with_cached_packm_mem() macro, used in packm_init(), to update the "currently used" dimensions of the mem_t object if the requested dimensions are smaller than the allocated dimensions. commit e99281a0f41d482fddeffa239bfc8e13e6d13d4b Author: Field G. Van Zee Date: Thu Mar 7 14:00:10 2013 -0600 Fixed test suite flop formulas for ops with side. Details: - Fixed incorrect flop counts in test suite modules for hemm, symm, trmm, trmm3, and trsm. - Comment updates in herk macro-kernels. commit ef8cbfc44dd620fdcbdb51cdb173217194bebe31 Author: Field G. Van Zee Date: Sat Mar 2 12:47:06 2013 -0600 Added "version" to .gitignore. Details: - Added "version" to .gitignore file so that the file does not show up when running 'git status', or accidentally get pulled into the index when running 'git add' or 'git add --all'. commit e9e0747c2f6c178f53ac46ab794acbb7b8c4fea8 Author: Field G. Van Zee Date: Sat Mar 2 12:43:54 2013 -0600 Removed version file from version control. Details: - Removed version file from version control to prevent git errors that occur when trying to pull new commits. commit bb612f864e9c17dd9805e9446840f02259619469 Author: Field G. Van Zee Date: Fri Mar 1 12:55:42 2013 -0600 Updated behavior of bl2_obj_induce_trans() macro. Details: - Changed bl2_obj_induce_trans() so that the transposition bit is no longer updated as part of the macro. All current uses of the macro have been coupled with instances of bl2_obj_set_trans() to clear the bit. - Added Jed to CREDITS file. commit f24e29b789e7314764a818ceb3063126936c986f Author: Field G. Van Zee Date: Fri Feb 22 18:15:41 2013 -0600 Replaced banded/packed BLAS2 stubs with f2c code. Details: - Retired the blas2blis wrappers that simply called abort with a "not yet implemented" message. This includes all of the level-2 banded and packed routines. - Replaced the aforementioned with the corresponding netlib implementations having been run through f2c (with some customization). - Added directories named 'attic' to build/gen-make-frags/ignore_list. commit 1454c1a14207766dfed372b8e38b47fa384f5198 Author: Field G. Van Zee Date: Fri Feb 22 12:38:45 2013 -0600 Moved Fortran name-mangling macro to bl2_config.h. Details: - Moved the Fortran-77 name-mangling macros from bl2_blas_macro_defs.h to the configuration directory (bl2_config.h, specifically) given that it can be expected to be tweaked by some developers. commit ede75693e5a36c6006087c4a7df834175b604504 Author: Field G. Van Zee Date: Fri Feb 22 12:11:24 2013 -0600 Implemented blas2blis compatibility layer. Details: - Added the blas2blis compatibility layer, located in frame/compat. This includes virtually all of the BLAS, including banded and packed level-2 operations. - Defined bl2_init_safe(), bl2_finalize_safe(). The former allows a conditional initialization, which stores the "exit status" in an err_t, which is then read by the latter function to determine whether finalization should actually take place. - Added calls to bl2_init_safe(), bl2_finalize_safe() to all level-2 and level-3 BLAS-like wrappers. - Added configuration option to instruct BLIS to remain initialized whenever it automatically initializes itself (via bl2_init_safe()), until/unless the application code explicitly calls bl2_finalize(). - Added INSERT_GENTFUNC* and INSERT_GENTPROT* macros to facilitate type templatization of blas2blis wrappers. - Defined level-0 scalar macro bl2_??swaps(). - Defined level-1v operation bl2_swapv(). - Defined some "Fortran" types to bl2_type_defs.h for use with BLAS wrappers. commit 995edf43e21c1868732dbdd7fee14b08730218bd Author: Field G. Van Zee Date: Thu Feb 21 14:30:50 2013 -0600 Updated version file. (Forgot to in prev commit). commit e823b08aaf7b65ecc6ddc30570709ea8a4b52aa7 Author: Field G. Van Zee Date: Thu Feb 21 12:00:17 2013 -0600 Fixed some scalar types in BLAS-like Herm APIs. Details: - Some of the scalars of Hermitian operations, such as alpha in her, alpha and beta in herk, and beta in her2k, need to be real. These arguments were typed incorrectly as the complex types. This has been fixed. Note the issue was only present in the BLAS-like APIs for these operations (not the native object-based interfaces). commit 5ece050a669e74ba4a711d1d4669239d22d45642 Author: Field G. Van Zee Date: Wed Feb 20 15:50:54 2013 -0600 Updated version file. (Forgot to in prev commit). commit f243034b8b430d4684680ea8eddfd246e73fefc0 Author: Field G. Van Zee Date: Wed Feb 20 14:11:36 2013 -0600 Changed API of packm_init_pack() to use blksz_t. Details: - Changed the interface of packm_init_pack() so that mult_m and mult_n are passed in as type blksz_t* instead of dim_t. - Make similar change for packv_init_pack(). commit da0c22f24107be9f33e0ea2dae52e5534b1fd0e5 Author: Field G. Van Zee Date: Fri Feb 15 09:59:48 2013 -0600 Minor changes to lower levels of scalm and setm. Details: - Removed diagx parameter from lower-level interfaces of scalm. - Modified scalm_basic_check() to expect an object with a nonunit diagonal. - Changed setm_unb_var1() so that having an implicit unit diagonal results in only the strictly lower or upper triangle of the matrix being modified. commit 2c836adadcd2a7d7f217033ac4d7fcad03d5bd55 Author: Field G. Van Zee Date: Thu Feb 14 10:42:56 2013 -0600 Updated beta == zero semantics of mulsc. Details: - Updated beta == zero semantics of mulsc. Hopefully this is the last operation that needed updating. - Added Devin to CREDITS file. commit 722b66c7dcaaaa1b109e7c8b1d53fd71a9af8240 Author: Field G. Van Zee Date: Thu Feb 14 10:18:00 2013 -0600 Removed some calls to setv() in test modules. Details: - Removed calls to setv() in test modules whose sole purpose was to initialize vectors to zero to ensure that nan's and inf's would not taint the computation. Now that beta == zero semantics have been updated to clear the output operand (when beta is zero), rather than multiply against it, these setv() calls are no longer needed. commit e6ac623a902f776c42f85eadbf76996d9770a0db Author: Field G. Van Zee Date: Wed Feb 13 18:44:59 2013 -0600 Properly implemented beta == 0 semantics. Details: - Changed name of set0 and set0_mxn macros to set0s and set0s_mxn, respectively. - Added code to the following operations that sets the output operand to zero if the corresponding scalar is zero (rather than performing the floating-point multiply, or in the case of setv, copying the value). This will prevent nan's and inf's from creeping into results from uninitialized memory. - axpy - dotxv - scalv - scal2v - setv - gemv - ger - hemv - her - her2 - gemm reference ukernels commit aedccbc85d491e41711a0c6eb0d246d8700a199a Author: Field G. Van Zee Date: Wed Feb 13 18:29:53 2013 -0600 Fixed stale interface to packm_unb_var1(). Details: - Removed the control tree from the interface to packm_unb_var1(), which I meant to do when it was un-deprecated. commit c23135669f7a8a545e2e11ef559bf284be8bc65c Author: Field G. Van Zee Date: Wed Feb 13 13:21:00 2013 -0600 Un-deprecated packm_unb_var1.c (needed by l2 ops). Details: - Added bl2_packm_unb_var1() back into the mix once I realized that level-2 operations still need this routine for packing matrices. Now, whether level-2 operations should be packing matrices to begin with is another matter. But this fixes the segmentation fault one would have gotten when running bl2_gemv() on a general stride matrix. commit cf49e35f9819f9d93ebdca4703ade5abab28f6f6 Author: Field G. Van Zee Date: Tue Feb 12 18:39:35 2013 -0600 Removed cntl tree usage from packm implementation. Details: - Added new fields to obj_t info field: - invert_diag - pack_order_if_upper - pack_order_if_lower These fields allow packm_init() to embed information that begins in the control tree into the object so that the packm implementation does not need to use control trees at all. This is being done to aid Bryan's DxT code generation. - Added macros that operate on above fields. - Changed packm_init(), packm_blk_var2(), and packm_blk_var3() according to above changes. - Made similar (but much simpler) changes to packv. - Deprecated packm_blk_var1(), packm_unb_var1(), and packm_densify(). These were part of prototype implementations and are no longer needed. commit eb139ae256651af7820b93ef982626180195b87f Author: Field G. Van Zee Date: Tue Feb 12 12:39:30 2013 -0600 Replaced bl2_abs() with _fabs() where appropriate. commit 474bac30c99928f9e87315972bcb45c632c0b7ec Author: Field G. Van Zee Date: Tue Feb 12 12:23:48 2013 -0600 Removed level-0 macros projrs, grabis. Details: - Replaced instances of projrs and grabis macros with newer, more general-purpose getris. commit 03a260a457c8964e4603a655cee0d40ac17affba Author: Field G. Van Zee Date: Tue Feb 12 11:45:34 2013 -0600 Restored executable permissions to scripts. Details: - Restored executable (0755) permissions to scripts that were touched by the recursive sed script that updated the copyright headers in the previous commit. commit 1274e1243775e5e705114257a43176f63635227f Author: Field G. Van Zee Date: Mon Feb 11 14:37:47 2013 -0600 Updated copyright headers from 2012 to 2013. commit 3b620cc8e90c53c79129bd9dd89ae6b77c2446f1 Author: Field G. Van Zee Date: Mon Feb 11 13:38:07 2013 -0600 CHANGELOG update. commit 768fcebaa8be0eb936a6e7a02cd8a19438c79d99 Author: Field G. Van Zee Date: Mon Feb 11 13:20:44 2013 -0600 Added unified test suite, and many fixes. Details: - Added a highly configurable, unified test suite. - Removed DUPB configuration constant from bl2_kernel.h and macro-kernel header files. Now, instead, DUPB is computed as (NDUP != 1) within each macro-kernel. This fixes a bug in trmm/trsm whereby bp was indexed into incorrectly when DUPB was set to FALSE but the NDUP was still non-unit. By encoding both pieces of information into one constant in _kernel.h, it seems somewhat less likely others will encounter this bug in the future. - Added level-2 cache blocksizes to _kernel.h for reference configuration, and defined blocksizes in _cntl.c files to these default values. - Changed semantics of her2k and syr2k such that these operations no longer expect the B matrix to already be conjugate-transposed (or just transposed for syr2k). However, these semantics are preserved for the internal mechanics of the implementations, including the internal back-end and all blocked variants. - Inserted checks for real-valued alpha and beta for herk/her2k and herk, respectively. - Relaxed general object structure constraints in _basic_check() for gemv, ger. - Changed her front-end to NOT copy-cast to real projection; instead, this is replaced by selecting either the real part or both parts within the unblocked algorithm implementation, depending on the value of conjh. - Added conjh to all _check routines for her so that the code knows when to verify that alpha has an imaginary component equal to zero (for her, but not syr). - Changed control tree for her to forgo packing. - Added unit diagonal support to fnormm. - Redefined real versions of abval2s macros in terms of fabs(), fabsf(). - Redefined complex versions of sqrt2s macros using the actual "complex square root" formula. - Created new level-0 object-based routines, suffixed with "sc" (for "scalar"). - Defined new level-1v, -1d, and -1m versions of add and sub operations (two-operand add and subtract). - Added new scalar macros: - getris: acquire real and imaginary components. - setris: set real and imaginary components. - addjs: addition with conjugated x. - subjs: subtraction with conjugated x. - Defined new utility operations: - absumv: element-wise sum of absolute values for vector elements. - absumm: element-wise sum of absolute values for matrix elements. - mkherm: convert existing matrix to Hermitian. - mksymm: convert existing matrix to symmetric. - mktrim: convert existing matrix to triangular. - Added various error checking routines. - Added bl2_clock_min_diff(), which is used to more cleanly measure the wall clock time of a code block. - Added general stride support to bl2_obj_alloc_buffer(). - Added bl2_obj_init_scalar(). - Updated parameter mapping in bl2_param_map.c. - Added support for queriable version string. - Fixed a bug in the her2k macro-kernels (which currently are simply implemented in terms of two invocations of herk) whereby beta was being applied to both the first and second rank-k updates, rather than only the first. - Fixed a bug in trmm/trsm whereby transpose and right side cases were not properly implemented due to erroneous assumptions regarding aliasing and root objects. - Fixed a bug in the upper triangular trsm macro-kernel in which the wrong MR x NR block of B was being updated. - Fixed a bug in the inverts macro in the double real case whereby the value was typecast to float before inversion. This affected non-unit cases of dtrsm. - Fixed a bug in the reference kernels for gemmtrsm whereby the minus one constant was being applied incorrectly. - Fixed a bug in the overall treatment of non-unit alpha for trsm. The code now mimics the rank-k strategy of gemm, whereby alpah is applied during the first iteration of variant 3, with BLIS_ONE passed in instead for subsequent iterations. This also required passing alpha into the macro- kernels as well as the fused gemmtrsm micro-kernels. - Fixed a bug in trsm_u_blk_var1 whereby the gemm macro-kernel was being called for blocks strictly above the diagonal. While this sounds good in theory, this cannot be done because gemm_ker_var2 expects row panels of A to be packed from top to bottom, while for trsm_u, A is actually packed from bottom to top due to the reverse (BR->TL) nature of the algorithm. - Fixed a bug in packm_cxk() whereby panel packings with unit panel dimensions were mishandled due to incorrect arguments to the copyv kernel. Also changed the copyv kernel invocation to scal2v so that these edge cases are properly handled when scaling is requested. - Fixed a bug in packv_int() whereby an uninitialized object is passed in instead of the source object. - Fixed a bug whereby level-2 code could allocate memory dynamically via bl2_malloc() and then attempt to free it via bl2_mm_release(). Also fixed a potential future bug whereby a mem_t object that is actually no longer "allocated" from the static pool is mistaken for being allocated due to failure to NULLify the buffer when the block was most recently released. - Fixed a bug in bl2_acquire_mpart_*() whreby the uplo field was mistakenly toggled when the requested subpartition needed to be "reflected" due to it residing in an unstored region. commit be94fb84c0351602d7585269f29998e3bf83f899 Author: Field G. Van Zee Date: Fri Jan 4 10:55:21 2013 -0600 Added missing 'd' to fused gemmtrsm function name. commit 879a179e1dee36f0c56765f2ab91a26861019b34 Author: Field G. Van Zee Date: Fri Jan 4 10:37:27 2013 -0600 Added debug statements to bl2_mm_acquire_m(). Details: - Added printf() statements to bl2_mm_acquire_m() to help debug issues with prematurely exhausted memory pool. - Removed 'd' from kernel names of reference kernels in clarksville configuration's bl2_kernel.h commit 806e74beb4eafeef620a555ffbb3f6779e29c7b6 Author: Field G. Van Zee Date: Thu Dec 20 17:07:50 2012 -0600 Defined Frobenius norm operations. Details: - Added level-0 grabis macro operation to grab imaginary component of one variable and copy it to the real component of another variable. - Defined sumsqv operation, which computes the sum of the absolute squares of the elements of a vector. This implementation is modeled after ?lassq in netlib LAPACK. - Defined fnormv and fnormm operations, which compute the Frobenius norm on vectors and matrices, respectively. These operations are treated as one- operand operations where the output norm value is the real projection of the datatype of the input operand. Both operations are implemented in terms of sumsqv. commit 66e80ce1aec099b2b2b0c4f295e38add2c921383 Author: Field G. Van Zee Date: Thu Dec 20 17:02:55 2012 -0600 Added GENT*R macros; tweaked bl2_machval defs. Details: - Added function and prototype macro-generating macros for GENTFUNCR and GENTPROTR, which are one-operand macros with auxiliary real projection types. - Tweaked bl2_machval files to use new macros. commit 2fecc88ca22142020573f168da715e8e9f3dd7de Author: Field G. Van Zee Date: Thu Dec 20 11:35:14 2012 -0600 Fixed harmless macro bug in level-1m operations. Details: - Fixed some inconsistent usage of n_iter_max and n_iter in the two bl2_set_dims_incs_uplo_[12]m macros. The right thing ended up happening despite the bug, which is why I had not discovered it until now. commit 8945db6ec9f82168cf72411ad408b4fdb44ae0d1 Author: Field G. Van Zee Date: Tue Dec 18 15:07:36 2012 -0600 Renamed x86,x86_64 kernels to indicate 'd' fusing. Details: - Renamed x86 and x86_64 kernels to contain a 'd' before the fusing shape to emphasize that the fusing shape is not for all datatype instances, but rather just for one (that of double-precision real). Other fusing shapes would be proportional to their precision and domain "byte footprints". - Corresponding changes to config/clarksville/bl2_kernel.h. commit 6fbbdd4e194d06096ad08c5db61127be338067db Author: Field G. Van Zee Date: Tue Dec 18 14:34:02 2012 -0600 More tweaks to _config.h, _kernel.h; smem tweaks. Details: - Moved kernel-related definitions form bl2_config.h to bl2_kernel.h. - Replaced #define of _GNU_SOURCE with #define of _POSIX_C_SOURCE. This accomplishes the same thing (enabling posix_memalign()) without enabling all of the GNU extensions we don't need. - Defined the size of the static memory pool in terms of MC, KC, and NC, as well as two new constants that determine how many MCxKC blocks and how many KCxNC blocks should be allocated (defined in bl2_config.h). - In the case of static memory pool exhaustion, replaced the generic bl2_abort() with a specific error code call. commit 5d8bdb21c48e8fb11bef6128a242122cc1470a99 Author: Field G. Van Zee Date: Mon Dec 17 16:07:36 2012 -0600 Minor reordering of bl2_config.h definitions. commit 4a83f67490136a898f558e273b76a687aed8b893 Author: Field G. Van Zee Date: Mon Dec 17 12:35:54 2012 -0600 Consolidated configuration headers. Details: - Merged contents of bl2_arch.h into bl2_config.h for reference and clarksville configurations. - Updated CREDITS, INSTALL, LICENSE, README files. commit 0670c33cc14612f636ef09ede4133404ae0af6ba Author: Field G. Van Zee Date: Fri Dec 14 12:45:26 2012 -0600 Fixed bug in reference gemm ukernels. Details: - Fixed a bug whereby, for the reference gemm ukernels, the matrix product was not correctly accumulated and scaled (by alpha) into the output matrix C. (Thanks to Fran for finding this bug.) - Whitespace changes to reference trsm kernels. commit e2e7cb2fbe615be4d375bc2dce88d03d98fadc9e Author: Field G. Van Zee Date: Thu Dec 13 18:17:54 2012 -0600 Expanded reference packm/unpackm kernel set to 16. Details: - Added 10xk, 12xk, 14xk, and 16xk reference kernels for packm and unpackm. - Updated bl2_[un]packm_cxk() to silently use scal2m if "out of range" kernel size is requested. (Thanks to Tyler for finding this bug.) - Updated bl2_kernel.h to contain new _KERNEL definitions, according to above changes, for 'reference' and 'clarksville' configurations. - Updated CHANGELOG. - Removed "output*.m" from .gitignore. commit 17455a8bce038dd570356ab0c5c11d9a89f20248 Author: Field G. Van Zee Date: Mon Dec 10 17:23:32 2012 -0600 Minor updates towards to 0.0.1. commit 7ad4ebef38b8e6eea9b6091844ba7294ec870271 Author: Field G. Van Zee Date: Mon Dec 10 16:18:40 2012 -0600 Tweaks to get BLIS compiling again on clarksville. Details: - Updated header files and make_defs.mk in config/clarksville. - Fixes to bl2_mem.c (now that SMEM_M, SMEM_N are gone). - Moved definition of blksz_t from bl2_cntl.h to bl2_type_defs.h. - Shuffled include statements in blis2.h. commit cc58ea86010b1f046134d13b546c878389df9af5 Author: Field G. Van Zee Date: Mon Dec 10 14:55:12 2012 -0600 Added template fragment.mk; updated .gitignore. commit 714c527b0eb153b7e2040b79349edc8372f743fd Author: Field G. Van Zee Date: Fri Dec 7 19:54:04 2012 -0600 Added 'changelog' make target; other tweaks. Details: - Updated CHANGELOG. - Added 'changelog' target to Makefile that runs 'git log --decorate' and overwrites CHANGELOG with the output. - Other trivial changes. commit e4e5404d26aded4873278e85faf6f14ac32115b5 Author: Field G. Van Zee Date: Fri Dec 7 17:34:53 2012 -0600 Define static memory pool size in bl2_config.h. commit 19bb507d0de6a2bd3ce37cf616bdcd6b419ed641 Author: Field G. Van Zee Date: Fri Dec 7 17:18:00 2012 -0600 Refined INSTALL text; added 'showconfig' target. Details: - Added 'showconfig' target to Makefile. - Added header files and ./config//make_defs.mk as prerequisites to object file rules. - Added config.mk as prerequisite to library install rules. - Edited and added to INSTALL file. commit 26cb659dd79636489db5a051aa60fff80273a7b9 Author: Field G. Van Zee Date: Thu Dec 6 15:34:53 2012 -0600 Added auto-detection of version string (via git). Details: - Added build/update-version-file.sh script for auto-detecting "version" string and updating 'version' file accordingly. (If .git directory is not present, then it is assumed this copy of BLIS is a downloaded release, in which case 'version' file is left unchanged.) - Added invocation of update-version-file.sh to configure script. commit b0ecd0ff52fa6ffc9e1d9eb44c365f7f009a6204 Author: Field G. Van Zee Date: Thu Dec 6 14:27:11 2012 -0600 Wrote first draft of INSTALL file. commit bcbe81235a35ccfdbcc2f2319a0ca6e04f75a785 Author: Field G. Van Zee Date: Thu Dec 6 12:42:35 2012 -0600 Updated standalone test Makefile and other fixes. Details: - Major edits to test/Makefile to bring up-to-date wrt new build system; should no longer be broken. - Minor edits to top-level Makefile. - Fixed copy-and-paste bugs in - frame/1m/packm/ukernels/bl2_packm_ref_?xk.c - frame/1m/unpackm/ukernels/bl2_unpackm_ref_?xk.c commit 2f272b40f43307909736327f49d17737c7a05d37 Author: Field G. Van Zee Date: Tue Dec 4 19:22:14 2012 -0600 Added build system and continued reorganization. Details: - Added/renamed packm, unpackm kernels. - Added machine value routines. - Added param_map facility. - Renamed AUTHORS to CREDITS. - Added Makefile; continued to expand upon existing configure script. - #define fuse_fac macros in operation headers if not defined already (by the user in bl2_kernels.h). commit 00f3498a8943be1b387f0d5c029c8c7891687ad5 Author: Field G. Van Zee Date: Mon Dec 3 12:36:11 2012 -0600 Initial commit. blis-1.1/CONTRIBUTING.md000066400000000000000000000114051474157777200145630ustar00rootroot00000000000000## How to contribute to BLIS First, we want to thank you for your interest in contributing to BLIS! Please read through the following guidelines to help you better understand how to best contribute your potential bug report, bugfix, feature, etc. #### **Did you find a bug?** * **Check if the bug has already been reported** by searching on GitHub under [Issues](https://github.com/flame/blis/issues). * If you can't find an open issue addressing the problem, please feel free to [open a new one](https://github.com/flame/blis/issues/new). Some things to keep in mind as you create your issue: * Be sure to include a **meaningful title**. Aim for a title that is neither overly general nor overly specific. * Putting some time into writing a **clear description** will help us understand your bug and how you found it. * You are welcome to include the BLIS version number (e.g. 0.3.2-15) if you wish, but please supplement it with the **actual git commit number** corresponding to the code that exhibits your reported behavior (the first seven or eight hex digits is fine). * Unless you are confident that it's not relevant, it's usually recommended that you **tell us how you configured BLIS** and **about your environment in general**. Your hardware microarchitecture, OS, compiler (including version), `configure` options, configuration target are all good examples of things to you may wish to include. If the bug involves elements of the build system such as bash or python functionality, please include those versions numbers, too. * If your bug involves behavior observed after linking to BLIS and running an application, please provide a minimally illustrative **code sample** that developers can run to (hopefully) reproduce the error or other concerning behavior. #### **Did you write a patch that fixes a bug?** If so, great, and thanks for your efforts! Please submit a new GitHub [pull request](https://github.com/flame/blis/pulls) with the patch. * Ensure the PR description clearly describes the problem and solution. Include any relevant issue numbers, if applicable. * Please limit your PR to addressing one issue at a time. For example, if you are fixing a bug and in the process you find a second, unrelated bug, please open a separate PR for the second bug (or, if the bugfix to the second bug is not obvious, you can simply open an [issue](https://github.com/flame/blis/issues/new) for the second bug). * Before submitting new code, please read the [coding conventions](https://github.com/flame/blis/wiki/CodingConventions) guide to learn more about our preferred coding conventions. (It's unlikely that we will turn away your contributed code due to mismatched coding styles, but it will be **highly** appreciated by project maintainers since it will save them the time of digressing from their work--whether now or later--to reformat your code.) #### **Did you fix whitespace or reformat code?** Unlike some other projects, if you find code that does not abide by the project's [coding conventions](https://github.com/flame/blis/wiki/CodingConventions) and you would like to bring that code up to our standards, we will be happy to accept your contribution. Please note in the commit log the fixing of whitespace, formatting, etc. as applicable. If you are making a more substantial contribution and in the vicinity of the affected code (i.e., within the same file) you stumble upon other code that works but could use some trivial changes or reformatting, you may combine the latter into the commit for the former. Just note in your commit log that you also fixed whitespace or applied reformatting. #### **Do you intend to add a new feature or change an existing one?** That's fine, we are interested to hear your ideas! * You may wish to introduce your idea by opening an [issue](https://github.com/flame/blis/issues/new) to describe your new feature, or how an existing feature is not sufficiently general-purpose. This allows you the chance to open a dialogue with other developers, who may provide you with useful feedback. * Before submitting new code, please read the [coding conventions](https://github.com/flame/blis/wiki/CodingConventions) guide to learn more about our preferred coding conventions. (See comments above regarding mismatched coding styles.) #### **Do you have questions about the source code?** * Feel free to ask questions on the [blis-devel mailing list](https://groups.google.com/d/forum/blis-devel). You'll have to join to post, but don't be shy! Most of the interesting discussion (outside of GitHub) happens on blis-devel. We also have a [blis-discuss mailing list](https://groups.google.com/d/forum/blis-discuss), but it is not the preferred venue for discussion these days. Here at the BLIS project, we :heart: our community. :) Thanks for helping to make BLIS better! Field blis-1.1/CREDITS000066400000000000000000000204601474157777200133530ustar00rootroot00000000000000 BLIS framework Acknowledgements --- The BLIS framework was originally authored by Field Van Zee @fgvanzee (The University of Texas at Austin) but many others have contributed code, ideas, and feedback, including Sameer Agarwal @sandwichmaker (Google) Murtaza Ali (Texas Instruments) Sajid Ali @s-sajid-ali (Northwestern University) Erling Andersen @erling-d-andersen Alex Arslan @ararslan Vernon Austel (IBM, T.J. Watson Research Center) Mohsen Aznaveh @Aznaveh (Texas A&M University) Abhishek Bagusetty @abagusetty (Argonne National Laboratory) Satish Balay @balay (Argonne National Laboratory) Kihiro Bando @bandokihiro Matthew Brett @matthew-brett (University of Birmingham) Jérémie du Boisberranger @jeremiedbb Jed Brown @jedbrown (Argonne National Laboratory) Alex Chiang @alexsifivetw (SiFive) Robin Christ @robinchrist Dilyn Corner @dilyn-corner Mat Cross @matcross (NAG) Harsh Dave @HarshDave12 (AMD) Tim Davis @DrTimothyAldenDavis (Texas A&M University) @decandia50 Daniël de Kok @danieldk (Explosion) Kay Dewhurst @jkd2016 (Max Planck Institute, Halle, Germany) Jeff Diamond (Oracle) Johannes Dieterich @iotamudelta Krzysztof Drewniak @krzysz00 Marat Dukhan @Maratyszcza (Google) Victor Eijkhout @VictorEijkhout (Texas Advanced Computing Center) Evgeny Epifanovsky @epifanovsky (Q-Chem) Isuru Fernando @isuruf James Foster @jd-foster (CSIRO) Roman Gareev @gareevroman Richard Goldschmidt @SuperFluffy Chris Goodyer Alexander Grund @Flamefire John Gunnels @jagunnels (IBM, T.J. Watson Research Center) Ali Emre Gülcü @Lephar @h-vetinari Jeff Hammond @jeffhammond (Intel) Jacob Gorm Hansen @jacobgorm Shivaprashanth H (Global Edge) Jean-Michel Hautbois @jhautbois Ian Henriksen @insertinterestingnamehere (The University of Texas at Austin) Greg Henry (Intel) Minh Quan Ho @hominhquan Matthew Honnibal @honnibal Stefan Husmann @stefanhusmann Aaron Hutchinson @Aaron-Hutchinson (SiFive) Francisco Igual @figual (Universidad Complutense de Madrid) @j-bm Madeesh Kannan @shadeMe Tony Kelman @tkelman Lee Killough @leekillough (Tactical Computing Labs) Mike Kistler @mkistler (IBM, Austin Research Laboratory) Nick Knight @nick-knight (SiFive) Ivan Korostelev @ivan23kor (University of Alberta) Kyungmin Lee @kyungminlee (Ohio State University) Michael Lehn @michael-lehn @leo4678 Shmuel Levine @ShmuelLevine @lschork2 Dave Love @loveshack Tze Meng Low (The University of Texas at Austin) Ye Luo @ye-luo (Argonne National Laboratory) Ricardo Magana @magania (Hewlett Packard Enterprise) Madan mohan Manokar @madanm3 (AMD) Giorgos Margaritis Bryan Marker @bamarker (The University of Texas at Austin) Simon Lukas Märtens @ACSimon33 (RWTH Aachen University) John Mather @jmather-sesi (SideFX Software) Devin Matthews @devinamatthews (The University of Texas at Austin) Stefanos Mavros @smavros Mithun Mohan @MithunMohanKadavil (AMD) @moon-chilled Ilknur Mustafazade @Runkli @nagsingh Bhaskar Nallani @BhaskarNallani (AMD) Stepan Nassyr @stepannassyr (Jülich Supercomputing Centre) Bart Oldeman @bartoldeman Nisanth M P @nisanthmp Nisanth Padinharepatt (AMD) Ajay Panyala @ajaypanyala Marc-Antoine Parent @maparent (Conversence) Devangi Parikh @dnparikh (The University of Texas at Austin) Elmar Peise @elmar-peise (RWTH-Aachen) Clément Pernet @ClementPernet Ilya Polkovnichenko Jack Poulson @poulson (Stanford) Mathieu Poumeyrol @kali Christos Psarras @ChrisPsa (RWTH Aachen University) @pkubaj @qnerd Michael Rader @mrader1248 Pradeep Rao @pradeeptrgit (AMD) @rmast Aleksei Rechinskii Leick Robinson @LeickR (Oracle) Karl Rupp @karlrupp Martin Schatz (The University of Texas at Austin) Nico Schlömer @nschloe Angelika Schwarz @angsch Rene Sitt Tony Skjellum @tonyskjellum (The University of Tennessee at Chattanooga) Mikhail Smelyanskiy (Intel, Parallel Computing Lab) Barry Smith @BarrySmith (Argonne National Laboratory) Nathaniel Smith @njsmith Shaden Smith @ShadenSmith Tyler Smith @tlrmchlsmth (The University of Texas at Austin) Edward Smyth @edwsmyth (AMD) Snehith @ArcadioN09 Paul Springer @springer13 (RWTH Aachen University) Adam J. Stewart @adamjstewart (University of Illinois at Urbana-Champaign) Vladimir Sukarev Harihara Sudhan S @ihariharasudhan (AMD) Chengguo Sun @chengguosun Christopher Taylor @ct-clmsn (Tactical Computing Labs) Santanu Thangaraj (AMD) Nicholai Tukanov @nicholaiTukanov (The University of Texas at Austin) Rhys Ulerich @RhysU (The University of Texas at Austin) Robert van de Geijn @rvdg (The University of Texas at Austin) Meghana Vankadari @Meghana-vankadari (AMD) Kiran Varaganti @kvaragan (AMD) Natalia Vassilieva (Hewlett Packard Enterprise) Andrew Wildman @awild82 (University of Washington) Zhang Xianyi @xianyi (Chinese Academy of Sciences) Benda Xu @heroxbd Guodong Xu @docularxu (Linaro.org) RuQing Xu @xrq-phys (The University of Tokyo) Srinivas Yadav @srinivasyadav18 Costas Yamin @cosstas Michael Yeh @myeh01 (SiFive) Chenhan Yu @ChenhanYu (The University of Texas at Austin) Roman Yurchak @rth (Symerio) Stefano Zampini @stefanozampini M. Zhou @cdluminate Igor Zhuravlov @jip (Far Eastern Federal University) @AngryLoki BLIS's development was partially funded by grants from industry partners, including AMD Hewlett Packard Enterprise Huawei Intel Microsoft Oracle Texas Instruments as well as the National Science Foundation (NSF Awards CCF-0917167, ACI-1148125/1340293, ACI-1550493, and CCF-1320112). blis-1.1/INSTALL000066400000000000000000000015671474157777200133730ustar00rootroot00000000000000 BLIS framework INSTALL --- For a detailed description of how to configure, compile, install, and link to a BLIS library on your local system, please read the build system documentation located in: docs/BuildSystem.md Note that the document's markdown content can be conveniently rendered by viewing the file over GitHub via a web browser: https://github.com/flame/blis/blob/master/docs/BuildSystem.md This document will always contain the most up-to-date information related to instantiating a BLIS library from the framework source code. If you have any further questions or wish to provide feedback, please contact the BLIS community by either by joining our Discord community! Instructions for joining may be found in: docs/Discord.md or in rendered form at: https://github.com/flame/blis/blob/master/docs/Discord.md Thanks for your interest in the BLIS framework! blis-1.1/LICENSE000066400000000000000000000040501474157777200133350ustar00rootroot00000000000000NOTE: Portions of this project's code are copyrighted by The University of Texas at Austin while other portions are copyrighted by Hewlett Packard Enterprise Development LP Advanced Micro Devices, Inc. Oracle Corporation with some overlap. Please see file-level license headers for file-specific copyright info. All parties provide their portions of the code under the 3-clause BSD license, found below. --- Copyright (C) 2012 - 2022, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2022, Oracle Corporation Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. blis-1.1/Makefile000066400000000000000000001505761474157777200140070ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2022, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Top-level makefile for libflame linear algebra library. # # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ libs libblis \ check-env check-env-mk check-env-fragments check-env-make-defs \ flat-header flat-cblas-header \ test \ testblas blastest-f2c blastest-bin blastest-run \ testsuite testsuite-bin \ testsuite-run testsuite-run-fast testsuite-run-md testsuite-run-salt \ testblis testblis-fast testblis-md testblis-salt \ check checkblas \ checkblis checkblis-fast checkblis-md checkblis-salt \ install-headers install-helper-headers install-libs install-lib-symlinks \ showconfig \ clean cleanmk cleanh cleanlib distclean \ cleantest cleanblastest cleanblistest \ changelog \ symbols \ install uninstall uninstall-old \ uninstall-libs uninstall-lib-symlinks uninstall-headers \ uninstall-old-libs uninstall-lib-symlinks uninstall-old-headers # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - We don't need to define DIST_PATH, LIB_PATH, INC_PATH, or SHARE_PATH since # the defaults in common.mk (and config.mk) are designed to work with the # top-level Makefile. #DIST_PATH := . #LIB_PATH = ./lib/$(CONFIG_NAME) #INC_PATH = ./include/$(CONFIG_NAME) #SHARE_PATH := . # # --- Include common makefile definitions -------------------------------------- # # Define the name of the common makefile. COMMON_MK_FILE := common.mk # Include the configuration file. -include $(COMMON_MK_FILE) # Detect whether we actually got the configuration file. If we didn't, then # it is likely that the user has not yet generated it (via configure). ifeq ($(strip $(COMMON_MK_INCLUDED)),yes) COMMON_MK_PRESENT := yes else COMMON_MK_PRESENT := no endif # # --- Main target variable definitions ----------------------------------------- # # --- Object file paths --- # Construct the base object file path for the current configuration. BASE_OBJ_PATH := ./$(OBJ_DIR)/$(CONFIG_NAME) # Construct base object file paths corresponding to the four locations # of source code. BASE_OBJ_CONFIG_PATH := $(BASE_OBJ_PATH)/$(CONFIG_DIR) BASE_OBJ_FRAME_PATH := $(BASE_OBJ_PATH)/$(FRAME_DIR) BASE_OBJ_REFKERN_PATH := $(BASE_OBJ_PATH)/$(REFKERN_DIR) BASE_OBJ_KERNELS_PATH := $(BASE_OBJ_PATH)/$(KERNELS_DIR) BASE_OBJ_ADDON_PATH := $(BASE_OBJ_PATH)/$(ADDON_DIR) BASE_OBJ_SANDBOX_PATH := $(BASE_OBJ_PATH)/$(SANDBOX_DIR) # --- Define install target names for static libraries --- LIBBLIS_A_INST := $(INSTALL_LIBDIR)/$(LIBBLIS_A) # --- Define install target names for shared libraries --- LIBBLIS_SO_INST := $(INSTALL_LIBDIR)/$(LIBBLIS_SO) LIBBLIS_SO_MAJ_INST := $(INSTALL_LIBDIR)/$(LIBBLIS_SONAME) ifeq ($(IS_WIN),yes) # The 'install' target does not create symlinks for Windows builds, so we don't # bother defining LIBBLIS_SO_MMB_INST. LIBBLIS_SO_MMB_INST := else LIBBLIS_SO_MMB_INST := $(INSTALL_LIBDIR)/$(LIBBLIS).$(LIBBLIS_SO_MMB_EXT) endif # --- Determine which libraries to build --- MK_LIBS := MK_LIBS_INST := MK_LIBS_SYML := ifeq ($(MK_ENABLE_STATIC),yes) MK_LIBS += $(LIBBLIS_A_PATH) MK_LIBS_INST += $(LIBBLIS_A_INST) MK_LIBS_SYML += endif ifeq ($(MK_ENABLE_SHARED),yes) MK_LIBS += $(LIBBLIS_SO_PATH) \ $(LIBBLIS_SO_MAJ_PATH) MK_LIBS_INST += $(LIBBLIS_SO_MMB_INST) MK_LIBS_SYML += $(LIBBLIS_SO_INST) \ $(LIBBLIS_SO_MAJ_INST) endif # Strip leading, internal, and trailing whitespace. MK_LIBS_INST := $(strip $(MK_LIBS_INST)) MK_LIBS_SYML := $(strip $(MK_LIBS_SYML)) # --- Define install directory for headers --- # Set the path to the subdirectory of the include installation directory. MK_INCL_DIR_INST := $(INSTALL_INCDIR)/blis # --- Define install directory for public makefile fragments --- # Set the path to the subdirectory of the share installation directory. MK_SHARE_DIR_INST := $(INSTALL_SHAREDIR)/blis PC_SHARE_DIR_INST := $(INSTALL_SHAREDIR)/pkgconfig # # --- Library object definitions ----------------------------------------------- # # In this section, we will isolate the relevant source code filepaths and # convert them to lists of object filepaths. Relevant source code falls into # four categories: configuration source; architecture-specific kernel source; # reference kernel source; and general framework source. # $(call gen-obj-paths-from-src file_exts, src_files, base_src_path, base_obj_path) gen-obj-paths-from-src = $(foreach ch, $(1), \ $(patsubst $(3)/%.$(ch), \ $(4)/%.o, \ $(filter %.$(ch), $(2)) ) ) # Generate object file paths for source code found in the sub-configuration # directories. MK_CONFIG_OBJS := $(call gen-obj-paths-from-src,$(CONFIG_SRC_SUFS),$(MK_CONFIG_SRC),$(CONFIG_PATH),$(BASE_OBJ_CONFIG_PATH)) # Generate object file paths for architecture-specific kernel source code. # We target only .c, .s, and .S files. Note that MK_KERNELS_SRC is already # limited to the kernel source corresponding to the kernel sets in # KERNEL_LIST. This is because the configure script only propogated makefile # fragments into those specific kernel subdirectories. MK_KERNELS_OBJS := $(call gen-obj-paths-from-src,$(KERNELS_SRC_SUFS),$(MK_KERNELS_SRC),$(KERNELS_PATH),$(BASE_OBJ_KERNELS_PATH)) # Generate object file paths for reference kernels, with one set of object # files for each sub-configuration in CONFIG_LIST. Note that due to the # nuances of naming the reference kernel files, we can't use the function # gen-obj-paths-from-src as we do above and below. MK_REFKERN_C := $(filter %.c, $(MK_REFKERN_SRC)) MK_REFKERN_OBJS := $(foreach arch, $(CONFIG_LIST), \ $(patsubst $(REFKERN_PATH)/%_$(REFNM).c, \ $(BASE_OBJ_REFKERN_PATH)/$(arch)/%_$(arch)_$(REFNM).o, \ $(MK_REFKERN_C) \ ) \ ) # Generate object file paths for all of the portable framework source code. MK_FRAME_OBJS := $(call gen-obj-paths-from-src,$(FRAME_SRC_SUFS),$(MK_FRAME_SRC),$(FRAME_PATH),$(BASE_OBJ_FRAME_PATH)) # Generate object file paths for the addon source code. If one or more addons # were not enabled a configure-time, these variable will we empty. # NOTE: We separate the source and objects into kernel and non-kernel lists. MK_ADDON_KERS_SRC := $(foreach addon, $(ADDON_LIST), \ $(filter $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \ $(MK_ADDON_SRC)) \ ) MK_ADDON_OTHER_SRC := $(foreach addon, $(ADDON_LIST), \ $(filter-out $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \ $(MK_ADDON_SRC)) \ ) MK_ADDON_KERS_OBJS := $(call gen-obj-paths-from-src,$(ADDON_SRC_SUFS),$(MK_ADDON_KERS_SRC),$(ADDON_PATH),$(BASE_OBJ_ADDON_PATH)) MK_ADDON_OTHER_OBJS := $(call gen-obj-paths-from-src,$(ADDON_SRC_SUFS),$(MK_ADDON_OTHER_SRC),$(ADDON_PATH),$(BASE_OBJ_ADDON_PATH)) MK_ADDON_OBJS := $(MK_ADDON_KERS_OBJS) $(MK_ADDON_OTHER_OBJS) # Generate object file paths for the sandbox source code. If a sandbox was not # enabled a configure-time, this variable will we empty. MK_SANDBOX_OBJS := $(call gen-obj-paths-from-src,$(SANDBOX_SRC_SUFS),$(MK_SANDBOX_SRC),$(SANDBOX_PATH),$(BASE_OBJ_SANDBOX_PATH)) # AMD has chosen to introduce AOCL-specific optimizations to certain BLIS # framework files that are otherwise intended to remain generic. Upstream # developers of vanilla BLIS have agreed to integrate some of these # optimizations, but in a way that keeps the AOCL-specific code segregated # in separate files containing the suffix '_amd'. For example, the BLAS # compatibility layer in vanilla BLIS contains a generic file named # 'bla_gemm.c'. AMD's version of this file is named 'bla_gemm_amd.c'. # Only one or the other is ever built and included in libblis. Currently, # these files are chosen automatically based on the target configuration. ifeq ($(ENABLE_AMD_FRAME_TWEAKS),yes) # Build is being done for AMD platforms; remove the objects which DO NOT have # an "_amd" suffix. MK_FRAME_AMD_OBJS := $(filter $(BASE_OBJ_FRAME_PATH)/%amd.o, $(MK_FRAME_OBJS)) FILES_TO_REMOVE := $(subst _amd.o,.o, $(MK_FRAME_AMD_OBJS)) MK_FRAME_OBJS := $(filter-out $(FILES_TO_REMOVE), $(MK_FRAME_OBJS)) else # Build is being done for non-AMD platforms; remove the objects which DO have # an "_amd" suffix. MK_FRAME_AMD_OBJS := $(filter $(BASE_OBJ_FRAME_PATH)/%amd.o, $(MK_FRAME_OBJS)) MK_FRAME_OBJS := $(filter-out $(MK_FRAME_AMD_OBJS), $(MK_FRAME_OBJS)) endif # Combine all of the object files into some readily-accessible variables. MK_BLIS_OBJS := $(MK_CONFIG_OBJS) \ $(MK_KERNELS_OBJS) \ $(MK_REFKERN_OBJS) \ $(MK_FRAME_OBJS) \ $(MK_ADDON_OBJS) \ $(MK_SANDBOX_OBJS) # Optionally filter out the BLAS and CBLAS compatibility layer object files. # This is not actually necessary, since each affected file is guarded by C # preprocessor macros, but it but prevents "empty" object files from being # added into the library (and reduces compilation time). BASE_OBJ_BLAS_PATH := $(BASE_OBJ_FRAME_PATH)/compat BASE_OBJ_CBLAS_PATH := $(BASE_OBJ_FRAME_PATH)/compat/cblas ifeq ($(MK_ENABLE_CBLAS),no) MK_BLIS_OBJS := $(filter-out $(BASE_OBJ_CBLAS_PATH)/%.o, $(MK_BLIS_OBJS) ) endif ifeq ($(MK_ENABLE_BLAS),no) MK_BLIS_OBJS := $(filter-out $(BASE_OBJ_BLAS_PATH)/%.o, $(MK_BLIS_OBJS) ) endif # # --- Monolithic header definitions -------------------------------------------- # # Define a list of headers to install. The default is to only install blis.h. HEADERS_TO_INSTALL := $(BLIS_H_FLAT) # If CBLAS is enabled, we also install cblas.h. This allows the user to continue # using #include "cblas.h" in their application, if they wish. (NOTE: Even if we # didn't install cblas.h, the user could *still* access CBLAS definitions and # function prototypes, but they would have to update their source code to use # #include "blis.h" instead of #include "cblas.h" since the latter header file # would not exist.) ifeq ($(MK_ENABLE_CBLAS),yes) HEADERS_TO_INSTALL += $(CBLAS_H_FLAT) endif # If requested, include AMD's C++ template header files in the list of headers # to install. ifeq ($(INSTALL_HH),yes) HEADERS_TO_INSTALL += $(wildcard $(VEND_CPP_PATH)/*.hh) endif # Define a list of so-called helper headers to install. These helper headers # are very simple headers that go one directory up from INCDIR/blis (which # by default is PREFIX/include/blis, where PREFIX is the install prefix). The # default is to only install the blis.h helper header. HELP_HEADERS_TO_INSTALL := $(HELP_BLIS_H_PATH) HELP_HEADERS_INSTALLED := $(INSTALL_INCDIR)/$(BLIS_H) # If CBLAS is enabled, we also install the cblas.h helper header. ifeq ($(MK_ENABLE_CBLAS),yes) HELP_HEADERS_TO_INSTALL += $(HELP_CBLAS_H_PATH) HELP_HEADERS_INSTALLED += $(INSTALL_INCDIR)/$(CBLAS_H) endif # # --- public makefile fragment definitions ------------------------------------- # # Define a list of makefile fragments to install. FRAGS_TO_INSTALL := $(CONFIG_MK_FILE) \ $(COMMON_MK_FILE) PC_IN_FILE := blis.pc.in PC_OUT_FILE := blis.pc # # --- BLAS test drivers definitions -------------------------------------------- # # The location of the BLAS test suite's input files. BLASTEST_INPUT_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/input # The location of the BLAS test suite object directory. BASE_OBJ_BLASTEST_PATH := $(BASE_OBJ_PATH)/$(BLASTEST_DIR) # The locations of the BLAS test suite source code (f2c and drivers). BLASTEST_F2C_SRC_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/f2c BLASTEST_DRV_SRC_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/src # The paths to object files we will create (f2c and drivers). BLASTEST_F2C_OBJS := $(sort \ $(patsubst $(BLASTEST_F2C_SRC_PATH)/%.c, \ $(BASE_OBJ_BLASTEST_PATH)/%.o, \ $(wildcard $(BLASTEST_F2C_SRC_PATH)/*.c)) \ ) BLASTEST_DRV_OBJS := $(sort \ $(patsubst $(BLASTEST_DRV_SRC_PATH)/%.c, \ $(BASE_OBJ_BLASTEST_PATH)/%.o, \ $(wildcard $(BLASTEST_DRV_SRC_PATH)/*.c)) \ ) # libf2c name and location. BLASTEST_F2C_LIB_NAME := libf2c.a BLASTEST_F2C_LIB := $(BASE_OBJ_BLASTEST_PATH)/$(BLASTEST_F2C_LIB_NAME) # The base names of each driver source file (ie: filename minus suffix). BLASTEST_DRV_BASES := $(basename $(notdir $(BLASTEST_DRV_OBJS))) # The binary executable driver names. BLASTEST_DRV_BINS := $(addsuffix .x,$(BLASTEST_DRV_BASES)) BLASTEST_DRV_BIN_PATHS := $(addprefix $(BASE_OBJ_BLASTEST_PATH)/,$(BLASTEST_DRV_BINS)) # Binary executable driver "run-" names BLASTEST_DRV_BINS_R := $(addprefix run-,$(BLASTEST_DRV_BASES)) # Filter level-1, level-2, and level-3 names to different variables. BLASTEST_DRV1_BASES := $(filter %1,$(BLASTEST_DRV_BASES)) BLASTEST_DRV2_BASES := $(filter %2,$(BLASTEST_DRV_BASES)) BLASTEST_DRV3_BASES := $(filter %3,$(BLASTEST_DRV_BASES)) # Define some CFLAGS that we'll only use when compiling BLAS test suite # files. BLAT_CFLAGS := -Wno-parentheses \ -I$(BLASTEST_F2C_SRC_PATH) \ -I. -DHAVE_BLIS_H # Suppress warnings about possibly uninitialized variables for the BLAS # test driver code (as output from f2c), which is riddled with such # variables, but only if the option to do so is supported. ifeq ($(CC_VENDOR),gcc) BLAT_CFLAGS += -Wno-maybe-uninitialized endif # The location of the script that checks the BLAS test output. BLASTEST_CHECK_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/$(BLASTEST_CHECK) # # --- BLIS testsuite definitions ----------------------------------------------- # # The location of the test suite's general and operations-specific # input/configuration files. TESTSUITE_CONF_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_CONF_GEN) TESTSUITE_CONF_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_CONF_OPS) TESTSUITE_FAST_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_FAST_GEN) TESTSUITE_FAST_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_FAST_OPS) TESTSUITE_MIXD_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_MIXD_GEN) TESTSUITE_MIXD_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_MIXD_OPS) TESTSUITE_SALT_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_SALT_GEN) TESTSUITE_SALT_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_SALT_OPS) # The locations of the test suite source directory and the local object # directory. TESTSUITE_SRC_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/src BASE_OBJ_TESTSUITE_PATH := $(BASE_OBJ_PATH)/$(TESTSUITE_DIR) # Convert source file paths to object file paths by replacing the base source # directories with the base object directories, and also replacing the source # file suffix (eg: '.c') with '.o'. MK_TESTSUITE_OBJS := $(sort \ $(patsubst $(TESTSUITE_SRC_PATH)/%.c, \ $(BASE_OBJ_TESTSUITE_PATH)/%.o, \ $(wildcard $(TESTSUITE_SRC_PATH)/*.c)) \ ) # The test suite binary executable filename. # NOTE: The TESTSUITE_WRAPPER variable defaults to the empty string if it # is not already set, in which case it has no effect lateron when the # testsuite binary is executed via lines such as # # $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) ... > $(TESTSUITE_OUT_FILE) # # The reason TESTSUITE_WRAPPER is employed in this way is so that some # unusual environments (e.g. ARM) can run the testsuite through some other # binary. See .travis.yml for details on how the variable is employed in # practice. TESTSUITE_BIN := test_$(LIBBLIS).x TESTSUITE_WRAPPER ?= # The location of the script that checks the BLIS testsuite output. TESTSUITE_CHECK_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_CHECK) # # --- Uninstall definitions ---------------------------------------------------- # ifeq ($(IS_CONFIGURED),yes) # These shell commands gather the filepaths to any library in the current # LIBDIR that might be left over from an old installation. We start with # including nothing for static libraries, since older static libraries are # always overwritten by newer ones. Then we add shared libraries, which are # named with three .so version numbers. UNINSTALL_OLD_LIBS := UNINSTALL_OLD_LIBS += $(filter-out $(INSTALL_LIBDIR)/$(LIBBLIS).$(LIBBLIS_SO_MMB_EXT),$(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS_SO).?.?.?)) # These shell commands gather the filepaths to any library symlink in the # current LIBDIR that might be left over from an old installation. We start # with symlinks named using the .so major version number. UNINSTALL_OLD_SYML := $(filter-out $(INSTALL_LIBDIR)/$(LIBBLIS_SO).$(SO_MAJOR),$(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS_SO).?)) # We also prepare to uninstall older-style symlinks whose names contain the # BLIS version number and configuration family. UNINSTALL_OLD_SYML += $(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS)-*.a) UNINSTALL_OLD_SYML += $(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS)-*.$(SHLIB_EXT)) # This shell command grabs all files named "*.h" that are not blis.h or cblas.h # in the installation directory. We consider this set of headers to be "old" and # eligible for removal upon running of the uninstall-old-headers target. UNINSTALL_OLD_HEADERS := $(filter-out $(BLIS_H),$(filter-out $(CBLAS_H),$(wildcard $(INSTALL_INCDIR)/blis/*.h))) endif # IS_CONFIGURED # # --- Targets/rules ------------------------------------------------------------ # # --- Primary targets --- all: libs libs: libblis test: checkblis checkblas check: checkblis-fast checkblas install: libs install-libs install-lib-symlinks install-headers install-share uninstall: uninstall-libs uninstall-lib-symlinks uninstall-headers uninstall-share uninstall-old: uninstall-old-libs uninstall-old-symlinks uninstall-old-headers clean: cleanh cleanlib # --- Environment check rules --- check-env: check-env-make-defs check-env-fragments check-env-mk check-env-mk: ifeq ($(CONFIG_MK_PRESENT),no) $(error Cannot proceed: config.mk not detected! Run configure first) endif check-env-fragments: check-env-mk ifeq ($(MAKEFILE_FRAGMENTS_PRESENT),no) $(error Cannot proceed: makefile fragments not detected! Run configure first) endif check-env-make-defs: check-env-fragments ifeq ($(ALL_MAKE_DEFS_MK_PRESENT),no) $(error Cannot proceed: Some make_defs.mk files not found or mislabeled!) endif # --- Shared/dynamic libblis symbol file creation/refresh --- symbols: check-env $(SYM_FILE) $(SYM_FILE): $(HEADERS_TO_INSTALL) ifeq ($(ENABLE_VERBOSE),yes) $(GEN_SYMS) > $(SYM_FILE) else @echo "Updating $(SYM_FILE)" @$(GEN_SYMS) > $(SYM_FILE) endif # --- Consolidated blis.h header creation --- flat-header: check-env $(BLIS_H_FLAT) $(BLIS_H_FLAT): $(ALL_H99_FILES) ifeq ($(ENABLE_VERBOSE),yes) $(FLATTEN_H) -l -v1 $(BLIS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" else @echo -n "Generating monolithic blis.h" @$(FLATTEN_H) -l -v1 $(BLIS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" @echo "Generated $@" endif # --- Consolidated cblas.h header creation --- flat-cblas-header: check-env $(CBLAS_H_FLAT) $(CBLAS_H_FLAT): $(FRAME_H99_FILES) ifeq ($(ENABLE_VERBOSE),yes) $(FLATTEN_H) -l -v1 $(CBLAS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" else @echo -n "Generating monolithic cblas.h" @$(FLATTEN_H) -l -v1 $(CBLAS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" @echo "Generated $@" endif # --- General source code / object code rules --- # FGVZ: Add support for compiling .s and .S files in 'config'/'kernels' # directories. # - May want to add an extra foreach loop around function eval/call. # first argument: a configuration name from config_list, used to look up the # CFLAGS to use during compilation. define make-config-rule $(BASE_OBJ_CONFIG_PATH)/$(1)/%.o: $(CONFIG_PATH)/$(1)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-config-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-config-text-for,$(1)) @$(CC) $(call get-config-cflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. define make-frame-rule $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-frame-text-for,$(1)) @$(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ endif ifneq ($(findstring hpx,$(THREADING_MODEL)),) $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.cpp $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CXX) $(call get-frame-cxxflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-frame-cxxtext-for,$(1)) @$(CXX) $(call get-frame-cxxflags-for,$(1)) -c $$< -o $$@ endif endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). define make-refinit-rule $(BASE_OBJ_REFKERN_PATH)/$(1)/bli_cntx_$(1)_ref.o: $(REFKERN_PATH)/bli_cntx_ref.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-refinit-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-refinit-text-for,$(1)) @$(CC) $(call get-refinit-cflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). define make-refkern-rule $(BASE_OBJ_REFKERN_PATH)/$(1)/%_$(1)_ref.o: $(REFKERN_PATH)/%_ref.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-refkern-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-refkern-text-for,$(1)) @$(CC) $(call get-refkern-cflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). # second argument: the configuration whose CFLAGS we should use in compilation. # third argument: the kernel file suffix being considered. define make-kernels-rule $(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.$(3) $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-kernel-cflags-for,$(2)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-kernel-text-for,$(2)) @$(CC) $(call get-kernel-cflags-for,$(2)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. # second argument: the C99 addon file suffix being considered. define make-c99-addon-rule $(BASE_OBJ_ADDON_PATH)/%.o: $(ADDON_PATH)/%.$(2) $(BLIS_H_FLAT) $(ADDON_H99_FILES) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-addon-c99flags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-addon-c99text-for,$(1)) @$(CC) $(call get-addon-c99flags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. # second argument: the C99 addon file suffix being considered. # third argument: the name of the addon being considered. define make-c99-addon-kers-rule $(BASE_OBJ_ADDON_PATH)/$(3)/$(KERNELS_DIR)/%.o: $(ADDON_PATH)/$(3)/$(KERNELS_DIR)/%.$(2) $(BLIS_H_FLAT) $(ADDON_H99_FILES) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-addon-kernel-c99flags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-addon-kernel-text-for,$(1)) @$(CC) $(call get-addon-kernel-c99flags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. # second argument: the C++ addon file suffix being considered. define make-cxx-addon-rule $(BASE_OBJ_ADDON_PATH)/%.o: $(ADDON_PATH)/%.$(2) $(BLIS_H_FLAT) $(ADDON_HXX_FILES) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-addon-cxxtext-for,$(1)) @$(CXX) $(call get-addon-cxxflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. # second argument: the C99 sandbox file suffix being considered. define make-c99-sandbox-rule $(BASE_OBJ_SANDBOX_PATH)/%.o: $(SANDBOX_PATH)/%.$(2) $(BLIS_H_FLAT) $(SANDBOX_H99_FILES) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-sandbox-c99flags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-sandbox-c99text-for,$(1)) @$(CC) $(call get-sandbox-c99flags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. # second argument: the C++ sandbox file suffix being considered. define make-cxx-sandbox-rule $(BASE_OBJ_SANDBOX_PATH)/%.o: $(SANDBOX_PATH)/%.$(2) $(BLIS_H_FLAT) $(SANDBOX_HXX_FILES) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CXX) $(call get-sandbox-cxxflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-sandbox-cxxtext-for,$(1)) @$(CXX) $(call get-sandbox-cxxflags-for,$(1)) -c $$< -o $$@ endif endef # Define functions to choose the correct sub-configuration name for the # given kernel set. This function is called when instantiating the # make-kernels-rule. get-config-for-kset = $(lastword $(subst :, ,$(filter $(1):%,$(KCONFIG_MAP)))) # Instantiate the build rule for files in the configuration directory for # each of the sub-configurations in CONFIG_LIST with the CFLAGS designated # for that sub-configuration. $(foreach conf, $(CONFIG_LIST), $(eval $(call make-config-rule,$(conf)))) # Instantiate the build rule for framework files. Use the CFLAGS for the # configuration family, which exists in the directory whose name is equal to # CONFIG_NAME. Note that this doesn't need to be in a loop since we expect # CONFIG_NAME to only ever contain a single name. (BTW: If CONFIG_NAME refers # to a singleton family, then CONFIG_LIST contains CONFIG_NAME as its only # item.) $(foreach conf, $(CONFIG_NAME), $(eval $(call make-frame-rule,$(conf)))) # Instantiate the build rule for reference kernel initialization and # reference kernels for each of the sub-configurations in CONFIG_LIST with # the CFLAGS designated for that sub-configuration. $(foreach conf, $(CONFIG_LIST), $(eval $(call make-refinit-rule,$(conf)))) $(foreach conf, $(CONFIG_LIST), $(eval $(call make-refkern-rule,$(conf)))) # Instantiate the build rule for optimized kernels for each of the kernel # sets in KERNEL_LIST with the CFLAGS designated for the sub-configuration # specified by the KCONFIG_MAP. $(foreach suf, $(KERNELS_SRC_SUFS), \ $(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)),$(suf))))) # Instantiate the build rule for C addon files. Use the CFLAGS for the # configuration family. $(foreach suf, $(ADDON_C99_SUFS), \ $(foreach conf, $(CONFIG_NAME), $(eval $(call make-c99-addon-rule,$(conf),$(suf))))) # Instantiate the build rule for C addon/kernels files. Use the CFLAGS for the # configuration family. $(foreach addon, $(ADDON_LIST), \ $(foreach suf, $(ADDON_C99_SUFS), \ $(foreach conf, $(CONFIG_NAME), $(eval $(call make-c99-addon-kers-rule,$(conf),$(suf),$(addon)))))) # Instantiate the build rule for C++ addon files. Use the CFLAGS for the # configuration family. $(foreach suf, $(ADDON_CXX_SUFS), \ $(foreach conf, $(CONFIG_NAME), $(eval $(call make-cxx-addon-rule,$(conf),$(suf))))) # Instantiate the build rule for C sandbox files. Use the CFLAGS for the # configuration family. $(foreach suf, $(SANDBOX_C99_SUFS), \ $(foreach conf, $(CONFIG_NAME), $(eval $(call make-c99-sandbox-rule,$(conf),$(suf))))) # Instantiate the build rule for C++ sandbox files. Use the CXXFLAGS for the # configuration family. $(foreach suf, $(SANDBOX_CXX_SUFS), \ $(foreach conf, $(CONFIG_NAME), $(eval $(call make-cxx-sandbox-rule,$(conf),$(suf))))) # --- All-purpose library rule (static and shared) --- libblis: check-env $(MK_LIBS) # --- Static library archiver rules --- $(LIBBLIS_A_PATH): $(MK_BLIS_OBJS) ifeq ($(ENABLE_VERBOSE),yes) ifeq ($(ARG_MAX_HACK),yes) $(file > $@.in,$^) $(AR) $(ARFLAGS) $@ @$@.in $(RM_F) $@.in $(RANLIB) $@ else $(AR) $(ARFLAGS) $@ $? $(RANLIB) $@ endif else # ifeq ($(ENABLE_VERBOSE),no) ifeq ($(ARG_MAX_HACK),yes) @echo "Archiving $@" @$(file > $@.in,$^) @$(AR) $(ARFLAGS) $@ @$@.in @$(RM_F) $@.in @$(RANLIB) $@ else @echo "Archiving $@" @$(AR) $(ARFLAGS) $@ $? @$(RANLIB) $@ endif endif # --- Shared library linker rules --- $(LIBBLIS_SO_PATH): $(MK_BLIS_OBJS) ifeq ($(ENABLE_VERBOSE),yes) ifeq ($(ARG_MAX_HACK),yes) $(file > $@.in,$^) $(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) @$@.in $(LDFLAGS) $(RM_F) $@.in else $(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) $^ $(LDFLAGS) endif else # ifeq ($(ENABLE_VERBOSE),no) ifeq ($(ARG_MAX_HACK),yes) @echo "Dynamically linking $@" @$(file > $@.in,$^) @$(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) @$@.in $(LDFLAGS) @$(RM_F) $@.in else @echo "Dynamically linking $@" @$(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) $^ $(LDFLAGS) endif endif # Local symlink for shared library. # NOTE: We use a '.loc' suffix to avoid filename collisions in case this # rule is executed concurrently with the install-lib-symlinks rule, which # also creates symlinks in the current directory (before installing them). # NOTE: We don't create any symlinks during Windows builds. $(LIBBLIS_SO_MAJ_PATH): $(LIBBLIS_SO_PATH) ifeq ($(IS_WIN),no) ifeq ($(ENABLE_VERBOSE),yes) $(SYMLINK) $( out.$(1) else @echo "Running $(1).x > 'out.$(1)'" @$(TESTSUITE_WRAPPER) $(BASE_OBJ_BLASTEST_PATH)/$(1).x > out.$(1) endif endef # Instantiate the rule above for each level-1 driver file. $(foreach name, $(BLASTEST_DRV1_BASES), $(eval $(call make-run-blat1-rule,$(name)))) # A rule to run ?blat2.x and ?blat3.x driver files. define make-run-blat23-rule run-$(1): $(BASE_OBJ_BLASTEST_PATH)/$(1).x ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) $(BASE_OBJ_BLASTEST_PATH)/$(1).x < $(BLASTEST_INPUT_PATH)/$(1).in else @echo "Running $(1).x < '$(BLASTEST_INPUT_PATH)/$(1).in' (output to 'out.$(1)')" @$(TESTSUITE_WRAPPER) $(BASE_OBJ_BLASTEST_PATH)/$(1).x < $(BLASTEST_INPUT_PATH)/$(1).in endif endef # Instantiate the rule above for each level-2 driver file. $(foreach name, $(BLASTEST_DRV2_BASES), $(eval $(call make-run-blat23-rule,$(name)))) # Instantiate the rule above for each level-3 driver file. $(foreach name, $(BLASTEST_DRV3_BASES), $(eval $(call make-run-blat23-rule,$(name)))) # Check the results of the BLAS test suite drivers. checkblas: blastest-run ifeq ($(ENABLE_VERBOSE),yes) - $(BLASTEST_CHECK_PATH) else @- $(BLASTEST_CHECK_PATH) endif # --- BLIS test suite rules --- testblis: testsuite testblis-fast: testsuite-run-fast testblis-md: testsuite-run-md testblis-salt: testsuite-run-salt testsuite: testsuite-run testsuite-bin: check-env $(TESTSUITE_BIN) # Object file rule. $(BASE_OBJ_TESTSUITE_PATH)/%.o: $(TESTSUITE_SRC_PATH)/%.c $(BLIS_H_FLAT) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-user-cflags-for,$(CONFIG_NAME)) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(call get-user-cflags-for,$(CONFIG_NAME)) -c $< -o $@ endif # Testsuite binary rule. $(TESTSUITE_BIN): $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ else @echo "Linking $@ against '$(LIBBLIS_LINK) "$(LDFLAGS)"'" @$(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ endif # A rule to run the testsuite using the normal input.* files. testsuite-run: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_CONF_GEN_PATH) \ -o $(TESTSUITE_CONF_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_CONF_GEN_PATH) \ -o $(TESTSUITE_CONF_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # A rule to run the testsuite using the input.*.fast files, which # run a set of tests designed to finish much more quickly. testsuite-run-fast: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_FAST_GEN_PATH) \ -o $(TESTSUITE_FAST_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) (fast) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_FAST_GEN_PATH) \ -o $(TESTSUITE_FAST_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # A rule to run the testsuite using the input.*.md files, which # run a set of tests designed to only exercise mixed-datatype gemm. testsuite-run-md: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_MIXD_GEN_PATH) \ -o $(TESTSUITE_MIXD_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) (mixed dt) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_MIXD_GEN_PATH) \ -o $(TESTSUITE_MIXD_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # A rule to run the testsuite using the input.*.salt files, which # simulates application-level threading across operation tests. testsuite-run-salt: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_SALT_GEN_PATH) \ -o $(TESTSUITE_SALT_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) (salt) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_SALT_GEN_PATH) \ -o $(TESTSUITE_SALT_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite. checkblis: testsuite-run ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite (fast). checkblis-fast: testsuite-run-fast ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite (mixed-datatype). checkblis-md: testsuite-run-md ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite (salt). checkblis-salt: testsuite-run-salt ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # --- AMD's C++ template header test rules --- # NOTE: The targets below won't work as intended for an out-of-tree build, # and so it's disabled for now. #testcpp: testvendcpp # Recursively run the test for AMD's C++ template header. #testvendcpp: # $(MAKE) -C $(VEND_TESTCPP_PATH) # --- Install header rules --- install-headers: check-env $(MK_INCL_DIR_INST) install-helper-headers # Rule for installing main headers. $(MK_INCL_DIR_INST): $(HEADERS_TO_INSTALL) $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@) $(INSTALL) -m 0644 $(HEADERS_TO_INSTALL) $(@) else @$(MKDIR) $(@) @echo "Installing $(notdir $(HEADERS_TO_INSTALL)) into $(@)/" @$(INSTALL) -m 0644 $(HEADERS_TO_INSTALL) $(@) endif install-helper-headers: check-env $(HELP_HEADERS_INSTALLED) # A rule to install a helper header file. define make-helper-header-rule $(INSTALL_INCDIR)/$(notdir $(1)): $(BUILD_PATH)/$(notdir $(1)) $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(INSTALL_INCDIR) $(INSTALL) -m 0644 $$(<) $$(@) else @$(MKDIR) $(INSTALL_INCDIR) @echo "Installing $$(@F) helper header into $(INSTALL_INCDIR)/" @$(INSTALL) -m 0644 $$(<) $$(@) endif endef # Instantiate the rule above for each helper header file to install. $(foreach h, $(HELP_HEADERS_TO_INSTALL), $(eval $(call make-helper-header-rule,$(h)))) # --- Install share rules --- install-share: check-env $(MK_SHARE_DIR_INST) $(PC_SHARE_DIR_INST) $(MK_SHARE_DIR_INST): $(FRAGS_TO_INSTALL) $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@) $(INSTALL) -m 0644 $(FRAGS_TO_INSTALL) $(@) $(MKDIR) -p $(@)/$(CONFIG_DIR)/$(CONFIG_NAME) $(INSTALL) -m 0644 $(CONFIG_DIR)/$(CONFIG_NAME)/$(MAKE_DEFS_FILE) \ $(@)/$(CONFIG_DIR)/$(CONFIG_NAME) else @$(MKDIR) $(@) @echo "Installing $(notdir $(FRAGS_TO_INSTALL)) into $(@)/" @$(INSTALL) -m 0644 $(FRAGS_TO_INSTALL) $(@) @$(MKDIR) -p $(@)/$(CONFIG_DIR)/$(CONFIG_NAME) @echo "Installing $(CONFIG_DIR)/$(CONFIG_NAME)/$(MAKE_DEFS_FILE) into $(@)/$(CONFIG_DIR)/$(CONFIG_NAME)" @$(INSTALL) -m 0644 $(CONFIG_DIR)/$(CONFIG_NAME)/$(MAKE_DEFS_FILE) \ $(@)/$(CONFIG_DIR)/$(CONFIG_NAME)/ endif $(PC_SHARE_DIR_INST): $(PC_IN_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@) $(shell cat "$(PC_IN_FILE)" \ | sed -e "s#@PACKAGE_VERSION@#$(VERSION)#g" \ | sed -e "s#@prefix@#$(prefix)#g" \ | sed -e "s#@exec_prefix@#$(exec_prefix)#g" \ | sed -e "s#@libdir@#$(libdir)#g" \ | sed -e "s#@includedir@#$(includedir)#g" \ | sed -e "s#@LDFLAGS@#$(LDFLAGS)#g" \ > "$(PC_OUT_FILE)" ) $(INSTALL) -m 0644 $(PC_OUT_FILE) $(@) else @$(MKDIR) $(@) @echo "Installing $(PC_OUT_FILE) into $(@)/" @$(shell cat "$(PC_IN_FILE)" \ | sed -e "s#@PACKAGE_VERSION@#$(VERSION)#g" \ | sed -e "s#@prefix@#$(prefix)#g" \ | sed -e "s#@exec_prefix@#$(exec_prefix)#g" \ | sed -e "s#@libdir@#$(libdir)#g" \ | sed -e "s#@includedir@#$(includedir)#g" \ | sed -e "s#@LDFLAGS@#$(LDFLAGS)#g" \ > "$(PC_OUT_FILE)" ) @$(INSTALL) -m 0644 $(PC_OUT_FILE) $(@) endif # --- Install library rules --- install-libs: check-env $(MK_LIBS_INST) # Install static library. $(INSTALL_LIBDIR)/%.a: $(BASE_LIB_PATH)/%.a $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@D) $(INSTALL) -m 0644 $< $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $< $@ endif # Install shared library containing .so major, minor, and build versions. # Note: Installation rules for Windows does not include major, minor, and # build version numbers. ifeq ($(IS_WIN),no) # Linux/OSX library (.so OR .dylib) installation rules. $(INSTALL_LIBDIR)/%.$(LIBBLIS_SO_MMB_EXT): $(BASE_LIB_PATH)/%.$(SHLIB_EXT) $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@D) $(INSTALL) -m 0755 $< $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0755 $< $@ endif else # ifeq ($(IS_WIN),yes) # Windows library (.dll and .lib) installation rules. $(INSTALL_LIBDIR)/%.$(SHLIB_EXT): $(BASE_LIB_PATH)/%.$(SHLIB_EXT) ifeq ($(ENABLE_VERBOSE),yes) @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ endif $(INSTALL_LIBDIR)/%.$(LIBBLIS_SO_MAJ_EXT): $(BASE_LIB_PATH)/%.$(LIBBLIS_SO_MAJ_EXT) ifeq ($(ENABLE_VERBOSE),yes) @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ endif endif # ifeq ($(IS_WIN),no) # --- Install-symlinks rules --- install-lib-symlinks: check-env $(MK_LIBS_SYML) # Note: Symlinks are not installed on Windows. ifeq ($(IS_WIN),no) # Install generic shared library symlink. $(INSTALL_LIBDIR)/%.$(SHLIB_EXT): $(INSTALL_LIBDIR)/%.$(LIBBLIS_SO_MMB_EXT) ifeq ($(ENABLE_VERBOSE),yes) $(SYMLINK) $( $(DIST_PATH)/$(CHANGELOG) # --- Uninstall rules --- # NOTE: We can't write these uninstall rules directly in terms of targets # $(MK_LIBS_VERS_CONF_INST), $(MK_LIBS_INST), and $(MK_INCL_DIR_INST) # because those targets are already defined in terms of rules that *build* # those products. uninstall-libs: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_F) $(MK_LIBS_INST) else @echo "Uninstalling libraries $(notdir $(MK_LIBS_INST)) from $(dir $(firstword $(MK_LIBS_INST)))" @- $(RM_F) $(MK_LIBS_INST) endif uninstall-lib-symlinks: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_F) $(MK_LIBS_SYML) else @echo "Uninstalling symlinks $(notdir $(MK_LIBS_SYML)) from $(dir $(firstword $(MK_LIBS_SYML)))" @- $(RM_F) $(MK_LIBS_SYML) endif uninstall-headers: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_RF) $(MK_INCL_DIR_INST) - $(RM_RF) $(HELP_HEADERS_INSTALLED) else @echo "Uninstalling directory '$(notdir $(MK_INCL_DIR_INST))' from $(dir $(MK_INCL_DIR_INST))" @- $(RM_RF) $(MK_INCL_DIR_INST) @echo "Uninstalling $(notdir $(HELP_HEADERS_INSTALLED)) from $(dir $(INSTALL_INCDIR))" @- $(RM_RF) $(HELP_HEADERS_INSTALLED) endif uninstall-share: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_RF) $(MK_SHARE_DIR_INST) else @echo "Uninstalling directory '$(notdir $(MK_SHARE_DIR_INST))' from $(dir $(MK_SHARE_DIR_INST))" @- $(RM_RF) $(MK_SHARE_DIR_INST) endif # --- Uninstall old rules --- uninstall-old-libs: $(UNINSTALL_OLD_LIBS) check-env uninstall-old-symlinks: $(UNINSTALL_OLD_SYML) check-env uninstall-old-headers: $(UNINSTALL_OLD_HEADERS) check-env $(UNINSTALL_OLD_LIBS) $(UNINSTALL_OLD_SYML) $(UNINSTALL_OLD_HEADERS): check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_F) $@ else @echo "Uninstalling $(@F) from $(@D)/" @- $(RM_F) $@ endif blis-1.1/README.md000066400000000000000000001341041474157777200136130ustar00rootroot00000000000000_Recipient of the **[2023 James H. Wilkinson Prize for Numerical Software](https://www.siam.org/prizes-recognition/major-prizes-lectures/detail/james-h-wilkinson-prize-for-numerical-software)**_ _Recipient of the **[2020 SIAM Activity Group on Supercomputing Best Paper Prize](https://www.siam.org/prizes-recognition/activity-group-prizes/detail/siag-sc-best-paper-prize)**_ ![The BLIS cat is sleeping.](http://www.cs.utexas.edu/users/field/blis_cat.png) [![Build Status](https://api.travis-ci.com/flame/blis.svg?branch=master)](https://app.travis-ci.com/github/flame/blis) [![Build Status](https://ci.appveyor.com/api/projects/status/github/flame/blis?branch=master&svg=true)](https://ci.appveyor.com/project/shpc/blis/branch/master) [Discord logo](docs/Discord.md) Contents -------- * **[Introduction](#introduction)** * **[Education and Learning](#education-and-learning)** * **[What's New](#whats-new)** * **[What People Are Saying About BLIS](#what-people-are-saying-about-blis)** * **[Key Features](#key-features)** * **[How to Download BLIS](#how-to-download-blis)** * **[Getting Started](#getting-started)** * **[Example Code](#example-code)** * **[Documentation](#documentation)** * **[Performance](#performance)** * **[External Packages](#external-packages)** * **[Discussion](#discussion)** * **[Contributing](#contributing)** * **[Citations](#citations)** * **[Awards](#awards)** * **[Funding](#funding)** Introduction ------------ BLIS is an [award-winning](#awards) portable software framework for instantiating high-performance BLAS-like dense linear algebra libraries. The framework was designed to isolate essential kernels of computation that, when optimized, immediately enable optimized implementations of most of its commonly used and computationally intensive operations. BLIS is written in [ISO C99](http://en.wikipedia.org/wiki/C99) and available under a [new/modified/3-clause BSD license](http://opensource.org/licenses/BSD-3-Clause). While BLIS exports a [new BLAS-like API](docs/BLISTypedAPI.md), it also includes a BLAS compatibility layer which gives application developers access to BLIS implementations via traditional [BLAS routine calls](http://www.netlib.org/lapack/lug/node145.html). An [object-based API](docs/BLISObjectAPI.md) unique to BLIS is also available. For a thorough presentation of our framework, please read our [ACM Transactions on Mathematical Software (TOMS)](https://toms.acm.org/) journal article, ["BLIS: A Framework for Rapidly Instantiating BLAS Functionality"](http://dl.acm.org/authorize?N91172). For those who just want an executive summary, please see the [Key Features](#key-features) section below. In a follow-up article (also in [ACM TOMS](https://toms.acm.org/)), ["The BLIS Framework: Experiments in Portability"](http://dl.acm.org/authorize?N16240), we investigate using BLIS to instantiate level-3 BLAS implementations on a variety of general-purpose, low-power, and multicore architectures. An IPDPS'14 conference paper titled ["Anatomy of High-Performance Many-Threaded Matrix Multiplication"](http://www.cs.utexas.edu/users/flame/pubs/blis3_ipdps14.pdf) systematically explores the opportunities for parallelism within the five loops that BLIS exposes in its matrix multiplication algorithm. For other papers related to BLIS, please see the [Citations section](#citations) below. It is our belief that BLIS offers substantial benefits in productivity when compared to conventional approaches to developing BLAS libraries, as well as a much-needed refinement of the BLAS interface, and thus constitutes a major advance in dense linear algebra computation. While BLIS remains a work-in-progress, we are excited to continue its development and further cultivate its use within the community. The BLIS framework is primarily developed and maintained by individuals in the [Science of High-Performance Computing](http://shpc.ices.utexas.edu/) (SHPC) group in the [Oden Institute for Computational Engineering and Sciences](https://www.oden.utexas.edu/) at [The University of Texas at Austin](https://www.utexas.edu/) and in the [Matthews Research Group](https://matthewsresearchgroup.webstarts.com/) at [Southern Methodist University](https://www.smu.edu/). Please visit the [SHPC](http://shpc.ices.utexas.edu/) website for more information about our research group, such as a list of [people](http://shpc.ices.utexas.edu/people.html) and [collaborators](http://shpc.ices.utexas.edu/collaborators.html), [funding sources](http://shpc.ices.utexas.edu/funding.html), [publications](http://shpc.ices.utexas.edu/publications.html), and [other educational projects](http://www.ulaff.net/) (such as MOOCs). Education and Learning ---------------------- Want to understand what's under the hood? Many of the same concepts and principles employed when developing BLIS are introduced and taught in a basic pedagogical setting as part of [LAFF-On Programming for High Performance (LAFF-On-PfHP)](http://www.ulaff.net/), one of several massive open online courses (MOOCs) in the [Linear Algebra: Foundations to Frontiers](http://www.ulaff.net/) series, all of which are available for free via the [edX platform](http://www.edx.org/). What's New ---------- * **BLIS selected for the 2023 James H. Wilkinson Prize for Numerical Software!** We are thrilled to announce that Field Van Zee and Devin Matthews were chosen to receive the [2023 James H. Wilkinson Prize for Numerical Software](https://www.siam.org/prizes-recognition/major-prizes-lectures/detail/james-h-wilkinson-prize-for-numerical-software). The selection committee sought to recognize the recipients "for the development of BLIS, a portable open-source software framework that facilitates rapid instantiation of high-performance BLAS and BLAS-like operations targeting modern CPUs." This prize is awarded once every four years to the authors of an outstanding piece of numerical software, or to individuals who have made an outstanding contribution to an existing piece of numerical software. It is awarded to an entry that best addresses all phases of the preparation of high-quality numerical software, and is intended to recognize innovative software in scientific computing and to encourage researchers in the earlier stages of their career. The prize will be awarded at the [2023 SIAM Conference on Computational Science and Engineering](https://www.siam.org/conferences/cm/conference/cse23) in Amsterdam. * **Join us on Discord!** In 2021, we soft-launched our [Discord](https://discord.com/) server by privately inviting current and former collaborators, attendees of our BLIS Retreat, as well as other participants within the BLIS ecosystem. We've been thrilled by the results thus far, and are happy to announce that our new community is now open to the broader public! If you'd like to hang out with other BLIS users and developers, ask a question, discuss future features, or just say hello, please feel free to join us! We've put together a [step-by-step guide](docs/Discord.md) for creating an account and joining our cozy enclave. We even have a monthly "BLIS happy hour" event where people can casually come together for a video chat, Q&A, brainstorm session, or whatever it happens to unfold into! * **Addons feature now available!** Have you ever wanted to quickly extend BLIS's operation support or define new custom BLIS APIs for your application, but were unsure of how to add your source code to BLIS? Do you want to isolate your custom code so that it only gets enabled when the user requests it? Do you like [sandboxes](docs/Sandboxes.md), but wish you didn't have to provide an implementation of `gemm`? If so, you should check out our new [addons](docs/Addons.md) feature. Addons act like optional extensions that can be created, enabled, and combined to suit your application's needs, all without formally integrating your code into the core BLIS framework. * **Multithreaded small/skinny matrix support for sgemm now available!** Thanks to funding and hardware support from Oracle, we have now accelerated `gemm` for single-precision real matrix problems where one or two dimensions is exceedingly small. This work is similar to the `gemm` optimization announced last year. For now, we have only gathered performance results on an AMD Epyc Zen2 system, but we hope to publish additional graphs for other architectures in the future. You may find these Zen2 graphs via the [PerformanceSmall](docs/PerformanceSmall.md) document. * **BLIS awarded SIAM Activity Group on Supercomputing Best Paper Prize for 2020!** We are thrilled to announce that the paper that we internally refer to as the second BLIS paper, "The BLIS Framework: Experiments in Portability." Field G. Van Zee, Tyler Smith, Bryan Marker, Tze Meng Low, Robert A. van de Geijn, Francisco Igual, Mikhail Smelyanskiy, Xianyi Zhang, Michael Kistler, Vernon Austel, John A. Gunnels, Lee Killough. ACM Transactions on Mathematical Software (TOMS), 42(2):12:1--12:19, 2016. was selected for the [SIAM Activity Group on Supercomputing Best Paper Prize](https://www.siam.org/prizes-recognition/activity-group-prizes/detail/siag-sc-best-paper-prize) for 2020. The prize is awarded once every two years to a paper judged to be the most outstanding paper in the field of parallel scientific and engineering computing, and has only been awarded once before (in 2016) since its inception in 2015 (the committee did not award the prize in 2018). The prize [was awarded](https://www.oden.utexas.edu/about/news/ScienceHighPerfomanceComputingSIAMBestPaperPrize/) at the [2020 SIAM Conference on Parallel Processing for Scientific Computing](https://www.siam.org/conferences/cm/conference/pp20) in Seattle. Robert was present at the conference to give [a talk on BLIS](https://meetings.siam.org/sess/dsp_programsess.cfm?SESSIONCODE=68266) and accept the prize alongside other coauthors. The selection committee sought to recognize the paper, "which validates BLIS, a framework relying on the notion of microkernels that enables both productivity and high performance." Their statement continues, "The framework will continue having an important influence on the design and the instantiation of dense linear algebra libraries." * **Multithreaded small/skinny matrix support for dgemm now available!** Thanks to contributions made possible by our partnership with AMD, we have dramatically accelerated `gemm` for double-precision real matrix problems where one or two dimensions is exceedingly small. A natural byproduct of this optimization is that the traditional case of small _m = n = k_ (i.e. square matrices) is also accelerated, even though it was not targeted specifically. And though only `dgemm` was optimized for now, support for other datatypes and/or other operations may be implemented in the future. We've also added new graphs to the [PerformanceSmall](docs/PerformanceSmall.md) document to showcase multithreaded performance when one or more matrix dimensions are small. * **Performance comparisons now available!** We recently measured the performance of various level-3 operations on a variety of hardware architectures, as implemented within BLIS and other BLAS libraries for all four of the standard floating-point datatypes. The results speak for themselves! Check out our extensive performance graphs and background info in our new [Performance](docs/Performance.md) document. * **BLIS is now in Debian Unstable!** Thanks to Debian developer-maintainers [M. Zhou](https://github.com/cdluminate) and [Nico Schlömer](https://github.com/nschloe) for sponsoring our package in Debian. Their participation, contributions, and advocacy were key to getting BLIS into the second-most popular Linux distribution (behind Ubuntu, which Debian packages feed into). The Debian tracker page may be found [here](https://tracker.debian.org/pkg/blis). * **BLIS now supports mixed-datatype gemm!** The `gemm` operation may now be executed on operands of mixed domains and/or mixed precisions. Any combination of storage datatype for A, B, and C is now supported, along with a separate computation precision that can differ from the storage precision of A and B. And even the 1m method now supports mixed-precision computation. For more details, please see our [ACM TOMS](https://toms.acm.org/) journal article submission ([current draft](http://www.cs.utexas.edu/users/flame/pubs/blis7_toms_rev0.pdf)). * **BLIS now implements the 1m method.** Let's face it: writing complex assembly `gemm` microkernels for a new architecture is never a priority--and now, it almost never needs to be. The 1m method leverages existing real domain `gemm` microkernels to implement all complex domain level-3 operations. For more details, please see our [ACM TOMS](https://toms.acm.org/) journal article submission ([current draft](http://www.cs.utexas.edu/users/flame/pubs/blis6_toms_rev2.pdf)). What People Are Saying About BLIS --------------------------------- *["I noticed a substantial increase in multithreaded performance on my own machine, which was extremely satisfying."](https://groups.google.com/d/msg/blis-discuss/8iu9B5KCxpA/uftpjgIsBwAJ)* ... *["[I was] happy it worked so well!"](https://groups.google.com/d/msg/blis-discuss/8iu9B5KCxpA/uftpjgIsBwAJ)* (Justin Shea) *["This is an awesome library."](https://github.com/flame/blis/issues/288#issuecomment-447488637)* ... *["I want to thank you and the blis team for your efforts."](https://github.com/flame/blis/issues/288#issuecomment-448074704)* ([@Lephar](https://github.com/Lephar)) *["Any time somebody outside Intel beats MKL by a nontrivial amount, I report it to the MKL team. It is fantastic for any open-source project to get within 10% of MKL... [T]his is why Intel funds BLIS development."](https://github.com/flame/blis/issues/264#issuecomment-428673275)* ([@jeffhammond](https://github.com/jeffhammond)) *["So BLIS is now a part of Elk."](https://github.com/flame/blis/issues/267#issuecomment-429303902)* ... *["We have found that zgemm applied to a 15000x15000 matrix with multi-threaded BLIS on a 32-core Ryzen 2990WX processor is about twice as fast as MKL"](https://github.com/flame/blis/issues/264#issuecomment-428373946)* ... *["I'm starting to like this a lot."](https://github.com/flame/blis/issues/264#issuecomment-428926191)* ([@jdk2016](https://github.com/jdk2016)) *["I [found] BLIS because I was looking for BLAS operations on C-ordered arrays for NumPy. BLIS has that, but even better is the fact that it's developed in the open using a more modern language than Fortran."](https://github.com/flame/blis/issues/254#issuecomment-423838345)* ([@nschloe](https://github.com/nschloe)) *["The specific reason to have BLIS included [in Linux distributions] is the KNL and SKX [AVX-512] BLAS support, which OpenBLAS doesn't have."](https://github.com/flame/blis/issues/210#issuecomment-393126303)* ([@loveshack](https://github.com/loveshack)) *["All tests pass without errors on OpenBSD. Thanks!"](https://github.com/flame/blis/issues/202#issuecomment-389691543)* ([@ararslan](https://github.com/ararslan)) *["Thank you very much for your great help!... Looking forward to benchmarking."](https://github.com/flame/blis/issues/180#issuecomment-375895449)* ([@mrader1248](https://github.com/mrader1248)) *["Thanks for the beautiful work."](https://github.com/flame/blis/issues/163#issue-286575452)* ([@mmrmo](https://github.com/mmrmo)) *["[M]y software currently uses BLIS for its BLAS interface..."](https://github.com/flame/blis/issues/129#issuecomment-302904805)* ([@ShadenSmith](https://github.com/ShadenSmith)) *["[T]hanks so much for your work on this! Excited to test."](https://github.com/flame/blis/issues/129#issuecomment-341565071)* ... *["[On AMD Excavator], BLIS is competitive to / slightly faster than OpenBLAS for dgemms in my tests."](https://github.com/flame/blis/issues/129#issuecomment-341608673)* ([@iotamudelta](https://github.com/iotamudelta)) *["BLIS provided the only viable option on KNL, whose ecosystem is at present dominated by blackbox toolchains. Thanks again. Keep on this great work."](https://github.com/flame/blis/issues/116#issuecomment-281225101)* ([@heroxbd](https://github.com/heroxbd)) *["I want to definitely try this out..."](https://github.com/flame/blis/issues/12#issuecomment-48086295)* ([@ViralBShah](https://github.com/ViralBShah)) Key Features ------------ BLIS offers several advantages over traditional BLAS libraries: * **Portability that doesn't impede high performance.** Portability was a top priority of ours when creating BLIS. With virtually no additional effort on the part of the developer, BLIS is configurable as a fully-functional reference implementation. But more importantly, the framework identifies and isolates a key set of computational kernels which, when optimized, immediately and automatically optimize performance across virtually all level-2 and level-3 BLIS operations. In this way, the framework acts as a productivity multiplier. And since the optimized (non-portable) code is compartmentalized within these few kernels, instantiating a high-performance BLIS library on a new architecture is a relatively straightforward endeavor. * **Generalized matrix storage.** The BLIS framework exports interfaces that allow one to specify both the row stride and column stride of a matrix. This allows one to compute with matrices stored in column-major order, row-major order, or by general stride. (This latter storage format is important for those seeking to implement tensor contractions on multidimensional arrays.) Furthermore, since BLIS tracks stride information for each matrix, operands of different storage formats can be used within the same operation invocation. By contrast, BLAS requires column-major storage. And while the CBLAS interface supports row-major storage, it does not allow mixing storage formats. * **Rich support for the complex domain.** BLIS operations are developed and expressed in their most general form, which is typically in the complex domain. These formulations then simplify elegantly down to the real domain, with conjugations becoming no-ops. Unlike the BLAS, all input operands in BLIS that allow transposition and conjugate-transposition also support conjugation (without transposition), which obviates the need for thread-unsafe workarounds. Also, where applicable, both complex symmetric and complex Hermitian forms are supported. (BLAS omits some complex symmetric operations, such as `symv`, `syr`, and `syr2`.) Another great example of BLIS serving as a portability lever is its implementation of the 1m method for complex matrix multiplication, a novel mechanism of providing high-performance complex level-3 operations using only real domain microkernels. This new innovation guarantees automatic level-3 support in the complex domain even when the kernel developers entirely forgo writing complex kernels. * **Advanced multithreading support.** BLIS allows multiple levels of symmetric multithreading for nearly all level-3 operations. (Currently, users may choose to obtain parallelism via OpenMP, POSIX threads, or HPX). This means that matrices may be partitioned in multiple dimensions simultaneously to attain scalable, high-performance parallelism on multicore and many-core architectures. The key to this innovation is a thread-specific control tree infrastructure which encodes information about the logical thread topology and allows threads to query and communicate data amongst one another. BLIS also employs so-called "quadratic partitioning" when computing dimension sub-ranges for each thread, so that arbitrary diagonal offsets of structured matrices with unreferenced regions are taken into account to achieve proper load balance. More recently, BLIS introduced a runtime abstraction to specify parallelism on a per-call basis, which is useful for applications that want to handle most of the parallelism. * **Ease of use.** The BLIS framework, and the library of routines it generates, are easy to use for end users, experts, and vendors alike. An optional BLAS compatibility layer provides application developers with backwards compatibility to existing BLAS-dependent codes. Or, one may adjust or write their application to take advantage of new BLIS functionality (such as generalized storage formats or additional complex operations) by calling one of BLIS's native APIs directly. BLIS's typed API will feel familiar to many veterans of BLAS since these interfaces use BLAS-like calling sequences. And many will find BLIS's object-based APIs a delight to use when customizing or writing their own BLIS operations. (Objects are relatively lightweight `structs` and passed by address, which helps tame function calling overhead.) * **Multilayered API and exposed kernels.** The BLIS framework exposes its implementations in various layers, allowing expert developers to access exactly the functionality desired. This layered interface includes that of the lowest-level kernels, for those who wish to bypass the bulk of the framework. Optimizations can occur at various levels, in part thanks to exposed packing and unpacking facilities, which by default are highly parameterized and flexible. * **Functionality that grows with the community's needs.** As its name suggests, the BLIS framework is not a single library or static API, but rather a nearly-complete template for instantiating high-performance BLAS-like libraries. Furthermore, the framework is extensible, allowing developers to leverage existing components to support new operations as they are identified. If such operations require new kernels for optimal efficiency, the framework and its APIs will be adjusted and extended accordingly. Community developers who wish to experiment with creating new operations or APIs in BLIS can quickly and easily do so via the [Addons](docs/Addons.md) feature. * **Code re-use.** Auto-generation approaches to achieving the aforementioned goals tend to quickly lead to code bloat due to the multiple dimensions of variation supported: operation (i.e. `gemm`, `herk`, `trmm`, etc.); parameter case (i.e. side, [conjugate-]transposition, upper/lower storage, unit/non-unit diagonal); datatype (i.e. single-/double-precision real/complex); matrix storage (i.e. row-major, column-major, generalized); and algorithm (i.e. partitioning path and kernel shape). These "brute force" approaches often consider and optimize each operation or case combination in isolation, which is less than ideal when the goal is to provide entire libraries. BLIS was designed to be a complete framework for implementing basic linear algebra operations, but supporting this vast amount of functionality in a manageable way required a holistic design that employed careful abstractions, layering, and recycling of generic (highly parameterized) codes, subject to the constraint that high performance remain attainable. * **A foundation for mixed domain and/or mixed precision operations.** BLIS was designed with the hope of one day allowing computation on real and complex operands within the same operation. Similarly, we wanted to allow mixing operands' numerical domains, floating-point precisions, or both domain and precision, and to optionally compute in a precision different than one or both operands' storage precisions. This feature has been implemented for the general matrix multiplication (`gemm`) operation, providing 128 different possible type combinations, which, when combined with existing transposition, conjugation, and storage parameters, enables 55,296 different `gemm` use cases. For more details, please see the documentation on [mixed datatype](docs/MixedDatatypes.md) support and/or our [ACM TOMS](https://toms.acm.org/) journal paper on mixed-domain/mixed-precision `gemm` ([linked below](#citations)). How to Download BLIS -------------------- There are a few ways to download BLIS. We list the most common four ways below. We **highly recommend** using either Option 1 or 2. Otherwise, we recommend Option 3 (over Option 4) so your compiler can perform optimizations specific to your hardware. 1. **Download a source repository with `git clone`.** Generally speaking, we prefer using `git clone` to clone a `git` repository. Having a repository allows the user to periodically pull in the latest changes and quickly rebuild BLIS whenever they wish. Also, implicit in cloning a repository is that the repository defaults to using the `master` branch, which contains the latest "stable" commits since the most recent release. (This is in contrast to Option 3 in which the user is opting for code that may be slightly out of date.) In order to clone a `git` repository of BLIS, please obtain a repository URL by clicking on the green button above the file/directory listing near the top of this page (as rendered by GitHub). Generally speaking, it will amount to executing the following command in your terminal shell: ``` git clone https://github.com/flame/blis.git ``` At this point, you will have the latest commit of the `master` branch checked out. If you wish to check out a particular version x.y.z, execute the following: ``` git checkout x.y.z ``` `git` will then transform your working copy to match the state of the commit associated with version x.y.z. You can view a list of tags at any time by executing: ``` git tag --list ``` 2. **Download a source repository via a zip file.** If you are uncomfortable with using `git` but would still like the latest stable commits, we recommend that you download BLIS as a zip file. In order to download a zip file of the BLIS source distribution, please click on the green button above the file listing near the top of this page. This should reveal a link for downloading the zip file. 3. **Download a source release via a tarball/zip file.** Alternatively, if you would like to stick to the code that is included in official releases, you may download either a tarball or zip file of BLIS's latest [release](https://github.com/flame/blis/releases). Some older releases are only available as [tagged](https://github.com/flame/blis/tags) commits. (Note: downloading release x.y.z is equivalent to downloading, or checking out, tag `x.y.z`.) We consider this option to be less than ideal for most people since it will likely mean you miss out on the latest bugfix or feature commits (in contrast to Options 1 or 2), and you also will not be able to update your code with a simple `git pull` command (in contrast to Option 1). 4. **Download a binary package specific to your OS.** While we don't recommend this as the first choice for most users, we provide links to community members who generously maintain BLIS packages for various Linux distributions such as Debian Unstable and EPEL/Fedora. Please see the [External Packages](#external-packages) section below for more information. Getting Started --------------- *NOTE: This section assumes you've either cloned a BLIS source code repository via `git`, downloaded the latest source code via a zip file, or downloaded the source code for a tagged version release---Options 1, 2, or 3, respectively, as discussed in [the previous section](#how-to-download-blis).* If you just want to build a sequential (not parallelized) version of BLIS in a hurry and come back and explore other topics later, you can configure and build BLIS as follows: ``` $ ./configure auto $ make [-j] ``` You can then verify your build by running BLAS- and BLIS-specific test drivers via `make check`: ``` $ make check [-j] ``` And if you would like to install BLIS to the directory specified to `configure` via the `--prefix` option, run the `install` target: ``` $ make install ``` Please read the output of `./configure --help` for a full list of configure-time options. If/when you have time, we *strongly* encourage you to read the detailed walkthrough of the build system found in our [Build System](docs/BuildSystem.md) guide. If you are still having trouble, you are welcome to [join us on Discord](docs/Discord.md) for further information and/or assistance. Example Code ------------ The BLIS source distribution provides example code in the `examples` directory. Example code focuses on using BLIS APIs (not BLAS or CBLAS), and resides in two subdirectories: [examples/oapi](examples/oapi) (which demonstrates the [object API](docs/BLISObjectAPI.md)) and [examples/tapi](examples/tapi) (which demonstrates the [typed API](docs/BLISTypedAPI.md)). Either directory contains several files, each containing various pieces of code that exercise core functionality of the BLIS API in question (object or typed). These example files should be thought of collectively like a tutorial, and therefore it is recommended to start from the beginning (the file that starts in `00`). You can build all of the examples by simply running `make` from either example subdirectory (`examples/oapi` or `examples/tapi`). (You can also run `make clean`.) The local `Makefile` assumes that you've already configured and built (but not necessarily installed) BLIS two directories up, in `../..`. If you have already installed BLIS to some permanent directory, you may refer to that installation by setting the environment variable `BLIS_INSTALL_PATH` prior to running make: ``` export BLIS_INSTALL_PATH=/usr/local; make ``` or by setting the same variable as part of the make command: ``` make BLIS_INSTALL_PATH=/usr/local ``` **Once the executable files have been built, we recommend reading the code and the corresponding executable output side by side. This will help you see the effects of each section of code.** This tutorial is not exhaustive or complete; several object API functions were omitted (mostly for brevity's sake) and thus more examples could be written. Documentation ------------- We provide extensive documentation on the BLIS build system, APIs, test infrastructure, and other important topics. All documentation is formatted in markdown and included in the BLIS source distribution (usually in the `docs` directory). Slightly longer descriptions of each document may be found via in the project's [wiki](https://github.com/flame/blis/wiki) section. **Documents for everyone:** * **[Build System](docs/BuildSystem.md).** This document covers the basics of configuring and building BLIS libraries, as well as related topics. * **[Testsuite](docs/Testsuite.md).** This document describes how to run BLIS's highly parameterized and configurable test suite, as well as the included BLAS test drivers. * **[BLIS Typed API Reference](docs/BLISTypedAPI.md).** Here we document the so-called "typed" (or BLAS-like) API. This is the API that many users who are already familiar with the BLAS will likely want to use. * **[BLIS Object API Reference](docs/BLISObjectAPI.md).** Here we document the object API. This is API abstracts away properties of vectors and matrices within `obj_t` structs that can be queried with accessor functions. Many developers and experts prefer this API over the typed API. * **[Hardware Support](docs/HardwareSupport.md).** This document maintains a table of supported microarchitectures. * **[Multithreading](docs/Multithreading.md).** This document describes how to use the multithreading features of BLIS. * **[Mixed-Datatypes](docs/MixedDatatypes.md).** This document provides an overview of BLIS's mixed-datatype functionality and provides a brief example of how to take advantage of this new code. * **[Performance](docs/Performance.md).** This document reports empirically measured performance of a representative set of level-3 operations on a variety of hardware architectures, as implemented within BLIS and other BLAS libraries for all four of the standard floating-point datatypes. * **[PerformanceSmall](docs/PerformanceSmall.md).** This document reports empirically measured performance of `gemm` on select hardware architectures within BLIS and other BLAS libraries when performing matrix problems where one or two dimensions is exceedingly small. * **[Discord](docs/Discord.md).** This document describes how to: create an account on Discord (if you don't already have one); obtain a private invite link; and use that invite link to join our BLIS server on Discord. * **[Release Notes](docs/ReleaseNotes.md).** This document tracks a summary of changes included with each new version of BLIS, along with contributor credits for key features. * **[Frequently Asked Questions](docs/FAQ.md).** If you have general questions about BLIS, please read this FAQ. If you can't find the answer to your question, please feel free to join the [blis-devel](https://groups.google.com/group/blis-devel) mailing list and post a question. We also have a [blis-discuss](https://groups.google.com/group/blis-discuss) mailing list that anyone can post to (even without joining). **Documents for github contributors:** * **[Contributing bug reports, feature requests, PRs, etc](CONTRIBUTING.md).** Interested in contributing to BLIS? Please read this document before getting started. It provides a general overview of how best to report bugs, propose new features, and offer code patches. * **[Coding Conventions](docs/CodingConventions.md).** If you are interested or planning on contributing code to BLIS, please read this document so that you can format your code in accordance with BLIS's standards. **Documents for BLIS developers:** * **[Kernels Guide](docs/KernelsHowTo.md).** If you would like to learn more about the types of kernels that BLIS exposes, their semantics, the operations that each kernel accelerates, and various implementation issues, please read this guide. * **[Configuration Guide](docs/ConfigurationHowTo.md).** If you would like to learn how to add new sub-configurations or configuration families, or are simply interested in learning how BLIS organizes its configurations and kernel sets, please read this thorough walkthrough of the configuration system. * **[Addon Guide](docs/Addons.md).** If you are interested in learning about using BLIS addons--that is, enabling existing (or creating new) bundles of operation or API code that are built into a BLIS library--please read this document. * **[Sandbox Guide](docs/Sandboxes.md).** If you are interested in learning about using sandboxes in BLIS--that is, providing alternative implementations of the `gemm` operation--please read this document. Performance ----------- We provide graphs that report performance of several implementations across a range of hardware types, multithreading configurations, problem sizes, operations, and datatypes. These pages also document most of the details needed to reproduce these experiments. * **[Performance](docs/Performance.md).** This document reports empirically measured performance of a representative set of level-3 operations on a variety of hardware architectures, as implemented within BLIS and other BLAS libraries for all four of the standard floating-point datatypes. * **[PerformanceSmall](docs/PerformanceSmall.md).** This document reports empirically measured performance of `gemm` on select hardware architectures within BLIS and other BLAS libraries when performing matrix problems where one or two dimensions is exceedingly small. External Packages ----------------- Generally speaking, we **highly recommend** building from source whenever possible using the latest `git` clone. (Tarballs of each [tagged release](https://github.com/flame/blis/releases) are also available, but we consider them to be less ideal since they are not as easy to upgrade as `git` clones.) That said, some users may prefer binary and/or source packages through their Linux distribution. Thanks to generous involvement/contributions from our community members, the following BLIS packages are now available: * **Debian**. [M. Zhou](https://github.com/cdluminate) has volunteered to sponsor and maintain BLIS packages within the Debian Linux distribution. The Debian package tracker can be found [here](https://tracker.debian.org/pkg/blis). (Also, thanks to [Nico Schlömer](https://github.com/nschloe) for previously volunteering his time to set up a standalone PPA.) * **Gentoo**. [M. Zhou](https://github.com/cdluminate) also maintains the [BLIS package](https://packages.gentoo.org/packages/sci-libs/blis) entry for [Gentoo](https://www.gentoo.org/), a Linux distribution known for its source-based [portage](https://wiki.gentoo.org/wiki/Portage) package manager and distribution system. * **EPEL/Fedora**. There are official BLIS packages in Fedora and EPEL (for RHEL7+ and compatible distributions) with versions for 64-bit integers, OpenMP, and pthreads, and shims which can be dynamically linked instead of reference BLAS. (NOTE: For architectures other than intel64, amd64, and maybe arm64, the performance of packaged BLIS will be low because it uses unoptimized generic kernels; for those architectures, [OpenBLAS](https://github.com/xianyi/OpenBLAS) may be a better solution.) [Dave Love](https://github.com/loveshack) provides additional packages for EPEL6 in a [Fedora Copr](https://copr.fedorainfracloud.org/coprs/loveshack/blis/), and possibly versions more recent than the official repo for other EPEL/Fedora releases. The source packages may build on other rpm-based distributions. * **OpenSuSE**. The copr referred to above has rpms for some OpenSuSE releases; the source rpms may build for others. * **GNU Guix**. Guix has BLIS packages, provides builds only for the generic target and some specific `x86_64` micro-architectures. * **Conda**. conda channel [conda-forge](https://github.com/conda-forge/blis-feedstock) has Linux, OSX and Windows binary packages for `x86_64`. Discussion ---------- Most of the active discussions are now happening on our [Discord](https://discord.com/) server. Users and developers alike are welcome! Please see the [BLIS Discord guide](docs/Discord.md) for a walkthrough of how to join us. You can also still stay in touch by using either of the following mailing lists: * [blis-devel](https://groups.google.com/group/blis-devel): Please join and post to this mailing list if you are a BLIS developer, or if you are trying to use BLIS beyond simply linking to it as a BLAS library. * [blis-discuss](https://groups.google.com/group/blis-discuss): Please join and post to this mailing list if you have general questions or feedback regarding BLIS. Application developers (end users) may wish to post here, unless they have bug reports, in which case they should open a [new issue](https://github.com/flame/blis/issues) on github. Contributing ------------ For information on how to contribute to our project, including preferred [coding conventions](docs/CodingConventions.md), please refer to the [CONTRIBUTING](CONTRIBUTING.md) file at the top-level of the BLIS source distribution. Citations --------- For those of you looking for the appropriate article to cite regarding BLIS, we recommend citing our [first ACM TOMS journal paper](https://dl.acm.org/doi/10.1145/2764454?cid=81314495332) ([unofficial backup link](https://www.cs.utexas.edu/users/flame/pubs/blis1_toms_rev3.pdf)): ``` @article{BLIS1, author = {Field G. {V}an~{Z}ee and Robert A. {v}an~{d}e~{G}eijn}, title = {{BLIS}: A Framework for Rapidly Instantiating {BLAS} Functionality}, journal = {ACM Transactions on Mathematical Software}, volume = {41}, number = {3}, pages = {14:1--14:33}, month = {June}, year = {2015}, issue_date = {June 2015}, url = {https://doi.acm.org/10.1145/2764454}, } ``` You may also cite the [second ACM TOMS journal paper](https://dl.acm.org/doi/10.1145/2755561?cid=81314495332) ([unofficial backup link](https://www.cs.utexas.edu/users/flame/pubs/blis2_toms_rev3.pdf)): ``` @article{BLIS2, author = {Field G. {V}an~{Z}ee and Tyler Smith and Francisco D. Igual and Mikhail Smelyanskiy and Xianyi Zhang and Michael Kistler and Vernon Austel and John Gunnels and Tze Meng Low and Bryan Marker and Lee Killough and Robert A. {v}an~{d}e~{G}eijn}, title = {The {BLIS} Framework: Experiments in Portability}, journal = {ACM Transactions on Mathematical Software}, volume = {42}, number = {2}, pages = {12:1--12:19}, month = {June}, year = {2016}, issue_date = {June 2016}, url = {https://doi.acm.org/10.1145/2755561}, } ``` We also have a third paper, submitted to IPDPS 2014, on achieving [multithreaded parallelism in BLIS](https://dl.acm.org/doi/10.1109/IPDPS.2014.110) ([unofficial backup link](https://www.cs.utexas.edu/users/flame/pubs/blis3_ipdps14.pdf)): ``` @inproceedings{BLIS3, author = {Tyler M. Smith and Robert A. {v}an~{d}e~{G}eijn and Mikhail Smelyanskiy and Jeff R. Hammond and Field G. {V}an~{Z}ee}, title = {Anatomy of High-Performance Many-Threaded Matrix Multiplication}, booktitle = {28th IEEE International Parallel \& Distributed Processing Symposium (IPDPS 2014)}, year = {2014}, url = {https://doi.org/10.1109/IPDPS.2014.110}, } ``` A fourth paper, submitted to ACM TOMS, also exists, which proposes an [analytical model](https://dl.acm.org/doi/10.1145/2925987) for determining blocksize parameters in BLIS ([unofficial backup link](https://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf)): ``` @article{BLIS4, author = {Tze Meng Low and Francisco D. Igual and Tyler M. Smith and Enrique S. Quintana-Ort\'{\i}}, title = {Analytical Modeling Is Enough for High-Performance {BLIS}}, journal = {ACM Transactions on Mathematical Software}, volume = {43}, number = {2}, pages = {12:1--12:18}, month = {August}, year = {2016}, issue_date = {August 2016}, url = {https://doi.acm.org/10.1145/2925987}, } ``` A fifth paper, submitted to ACM TOMS, begins the study of so-called [induced methods for complex matrix multiplication](https://dl.acm.org/doi/10.1145/3086466?cid=81314495332) ([unofficial backup link](https://www.cs.utexas.edu/users/flame/pubs/blis5_toms_rev2.pdf)): ``` @article{BLIS5, author = {Field G. {V}an~{Z}ee and Tyler Smith}, title = {Implementing High-performance Complex Matrix Multiplication via the 3m and 4m Methods}, journal = {ACM Transactions on Mathematical Software}, volume = {44}, number = {1}, pages = {7:1--7:36}, month = {July}, year = {2017}, issue_date = {July 2017}, url = {https://doi.acm.org/10.1145/3086466}, } ``` A sixth paper, submitted to ACM TOMS, revisits the topic of the previous article and derives a [superior induced method](https://epubs.siam.org/doi/10.1137/19M1282040) ([unofficial backup link](https://www.cs.utexas.edu/users/flame/pubs/blis6_sisc_rev3.pdf)): ``` @article{BLIS6, author = {Field G. {V}an~{Z}ee}, title = {Implementing High-Performance Complex Matrix Multiplication via the 1m Method}, journal = {SIAM Journal on Scientific Computing}, volume = {42}, number = {5}, pages = {C221--C244}, month = {September} year = {2020}, issue_date = {September 2020}, url = {https://doi.org/10.1137/19M1282040} } ``` A seventh paper, submitted to ACM TOMS, explores the implementation of `gemm` for [mixed-domain and/or mixed-precision](https://dl.acm.org/doi/10.1145/3402225?cid=81314495332) operands ([unofficial backup link](https://www.cs.utexas.edu/users/flame/pubs/blis7_toms_rev0.pdf)): ``` @article{BLIS7, author = {Field G. {V}an~{Z}ee and Devangi N. Parikh and Robert A. van~de~{G}eijn}, title = {Supporting Mixed-domain Mixed-precision Matrix Multiplication within the BLIS Framework}, journal = {ACM Transactions on Mathematical Software}, volume = {47}, number = {2}, pages = {12:1--12:26}, month = {April}, year = {2021}, issue_date = {April 2021}, url = {https://doi.org/10.1145/3402225}, } ``` Awards ------ * **[2023 James H. Wilkinson Prize for Numerical Software.](https://www.siam.org/prizes-recognition/major-prizes-lectures/detail/james-h-wilkinson-prize-for-numerical-software)** This prize is awarded once every four years to the authors of an outstanding piece of numerical software, or to individuals who have made an outstanding contribution to an existing piece of numerical software. The selection committee sought to recognize the recipients "for the development of [BLIS](https://github.com/flame/blis), a portable open-source software framework that facilitates rapid instantiation of high-performance BLAS and BLAS-like operations targeting modern CPUs." The prize will be awarded at the [2023 SIAM Conference on Computational Science and Engineering](https://www.siam.org/conferences/cm/conference/cse23) in Amsterdam. * **[2020 SIAM Activity Group on Supercomputing Best Paper Prize.](https://www.siam.org/prizes-recognition/activity-group-prizes/detail/siag-sc-best-paper-prize)** This prize is awarded once every two years to the authors of the most outstanding paper, as determined by the selection committee, in the field of parallel scientific and engineering computing published within the four calendar years preceding the award year. The prize was chosen for the paper ["The BLIS Framework: Experiments in Portability."](#citations) and awarded at the [2020 SIAM Conference on Parallel Processing for Scientific Computing](https://www.siam.org/conferences/cm/conference/pp20) in Seattle where Robert van de Geijn delivered [a talk on BLIS](https://meetings.siam.org/sess/dsp_programsess.cfm?SESSIONCODE=68266) and accepted the prize alongside other coauthors. See also: * [SIAM News | January 2020 Prize Spotlight](https://sinews.siam.org/Details-Page/january-2020-prize-spotlight#Field&Robert) * [Oden Institute's SHPC Group Win SIAM Best Paper Prize](https://www.oden.utexas.edu/about/news/ScienceHighPerfomanceComputingSIAMBestPaperPrize/) Funding ------- This project and its associated research were partially sponsored by grants from [Microsoft](https://www.microsoft.com/), [Intel](https://www.intel.com/), [Texas Instruments](https://www.ti.com/), [AMD](https://www.amd.com/), [HPE](https://www.hpe.com/), [Oracle](https://www.oracle.com/), [Huawei](https://www.huawei.com/), [Facebook](https://www.facebook.com/), and [ARM](https://www.arm.com/), as well as grants from the [National Science Foundation](https://www.nsf.gov/) (Awards CCF-0917167, ACI-1148125/1340293, CCF-1320112, and ACI-1550493). _Any opinions, findings and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the National Science Foundation (NSF)._ blis-1.1/RELEASING000066400000000000000000000026771474157777200136010ustar00rootroot00000000000000Here are the steps to follow to create a new release (version) of BLIS: 1. Make sure there are no commits that have yet to be pulled into local repository. $ git pull If there are any commits upstream, merge them as appropriate. 2. Consider whether the so_version should be updated (via the so_version file in the 'build' directory) due to any ABI changes since the previous version. If so, commit that change now. 3. Verify that the code builds properly. $ ./configure auto; make 4. Verify that the code passes BLIS and BLAS tests: $ make check # BLIS testsuite (fast) + BLAS test drivers $ make checkblis # BLIS testsuite (full ex. mixed-datatype) $ make checkblis-md # BLIS testsuite (mixed-datatype only) $ make checkblis-salt # BLIS testsuite (fast + salt) 5. Draft a new announcement to blis-devel, crediting those who contributed towards this version by browsing 'git log'. 6. Update CREDITS file if 'git log' reveals any new contributors. 7. Update docs/ReleaseNotes.md file with body of finalized announcement and the date of the release. 8. Commit changes from steps 5 and 6. 9. Bump the version number: $ ./build/bump-version.sh "0.3.2" This will result in two new commits: a version file update and a CHANGELOG file update. 10. Push the new commits and new tag associated with the new version: $ git push $ git push --tag 11. Send finalized announcement to blis-devel. blis-1.1/addon/000077500000000000000000000000001474157777200134165ustar00rootroot00000000000000blis-1.1/addon/old/000077500000000000000000000000001474157777200141745ustar00rootroot00000000000000blis-1.1/addon/old/gemmd/000077500000000000000000000000001474157777200152655ustar00rootroot00000000000000blis-1.1/addon/old/gemmd/attic/000077500000000000000000000000001474157777200163715ustar00rootroot00000000000000blis-1.1/addon/old/gemmd/attic/bao_gemmd_bp_var2.c000066400000000000000000000475231474157777200220750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemmd_fp typedef void (*FUNCPTR_T) ( conj_t conja, conj_t conjb, dim_t m, dim_t n, dim_t k, void* restrict alpha, void* restrict a, inc_t rs_a, inc_t cs_a, void* restrict d, inc_t incd, void* restrict b, inc_t rs_b, inc_t cs_b, void* restrict beta, void* restrict c, inc_t rs_c, inc_t cs_c, cntx_t* restrict cntx, rntm_t* restrict rntm, thrinfo_t* restrict thread ); // // -- gemmd-like block-panel algorithm (object interface) ---------------------- // // Define a function pointer array named ftypes and initialize its contents with // the addresses of the typed functions defined below, bao_?gemmd_bp_var2(). static FUNCPTR_T GENARRAY_PREF(ftypes,bao_,gemmd_bp_var2); void bao_gemmd_bp_var2 ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ) { const num_t dt = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( a ); void* restrict buf_a = bli_obj_buffer_at_off( a ); const inc_t rs_a = bli_obj_row_stride( a ); const inc_t cs_a = bli_obj_col_stride( a ); void* restrict buf_d = bli_obj_buffer_at_off( d ); const inc_t incd = bli_obj_vector_inc( d ); void* restrict buf_b = bli_obj_buffer_at_off( b ); const inc_t rs_b = bli_obj_row_stride( b ); const inc_t cs_b = bli_obj_col_stride( b ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt, beta ); // Index into the function pointer array to extract the correct // typed function pointer based on the chosen datatype. FUNCPTR_T f = ftypes[dt]; // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_d, incd, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } // // -- gemmd-like block-panel algorithm (typed interface) ----------------------- // #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict d, inc_t incd, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ thrinfo_t* restrict thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for various blocksizes. */ \ const dim_t NR = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t NC = bli_cntx_get_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t MC = bli_cntx_get_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t KC = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); \ \ /* Query the context for the microkernel address and cast it to its function pointer type. */ \ /* PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ */ \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ /* ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool col_pref = bli_cntx_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ */ \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = cs_c; \ const inc_t jcstep_b = cs_b; \ \ const inc_t pcstep_a = cs_a; \ const inc_t pcstep_d = incd; \ const inc_t pcstep_b = rs_b; \ \ const inc_t icstep_c = rs_c; \ const inc_t icstep_a = rs_a; \ \ const inc_t jrstep_c = cs_c * NR; \ \ const inc_t irstep_c = rs_c * MR; \ \ ctype* restrict a_00 = a; \ ctype* restrict d_00 = d; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ /* Make local copies of the scalars to prevent any unnecessary sharing of cache lines between the cores' caches. */ \ ctype alpha_local = *alpha_cast; \ ctype beta_local = *beta_cast; \ ctype one_local = *PASTEMAC(ch,1); \ /*ctype zero_local = *PASTEMAC(ch,0);*/ \ \ auxinfo_t aux; \ \ /* Initialize a mem_t entry for A and B. Strictly speaking, this is only needed for the matrix we will be packing (if any), but we do it unconditionally to be safe. */ \ mem_t mem_a = BLIS_MEM_INITIALIZER; \ mem_t mem_b = BLIS_MEM_INITIALIZER; \ \ /* Define an array of bszid_t ids, which will act as our substitute for the cntl_t tree. */ \ bszid_t bszids[8] = { BLIS_NC, /* 5th loop */ \ BLIS_KC, /* 4th loop */ \ BLIS_NO_PART, /* pack B */ \ BLIS_MC, /* 3rd loop */ \ BLIS_NO_PART, /* pack A */ \ BLIS_NR, /* 2nd loop */ \ BLIS_MR, /* 1st loop */ \ BLIS_KR }; /* microkernel loop */ \ \ bszid_t* restrict bszids_jc = &bszids[0]; \ bszid_t* restrict bszids_pc = &bszids[1]; \ /*bszid_t* restrict bszids_pb = &bszids[2];*/ \ bszid_t* restrict bszids_ic = &bszids[3]; \ /*bszid_t* restrict bszids_pa = &bszids[4];*/ \ bszid_t* restrict bszids_jr = &bszids[5]; \ /*bszid_t* restrict bszids_ir = &bszids[6];*/ \ \ thrinfo_t* restrict thread_jc = NULL; \ thrinfo_t* restrict thread_pc = NULL; \ thrinfo_t* restrict thread_pb = NULL; \ thrinfo_t* restrict thread_ic = NULL; \ thrinfo_t* restrict thread_pa = NULL; \ thrinfo_t* restrict thread_jr = NULL; \ thrinfo_t* restrict thread_ir = NULL; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_jc = thread; \ bli_thrinfo_sup_grow( rntm, bszids_jc, thread_jc ); \ \ /* Compute the JC loop thread range for the current thread. */ \ dim_t jc_start, jc_end; \ bli_thread_range_sub( thread_jc, n, NR, FALSE, &jc_start, &jc_end ); \ const dim_t n_local = jc_end - jc_start; \ \ /* Compute number of primary and leftover components of the JC loop. */ \ /*const dim_t jc_iter = ( n_local + NC - 1 ) / NC;*/ \ const dim_t jc_left = n_local % NC; \ \ /* Loop over the n dimension (NC rows/columns at a time). */ \ for ( dim_t jj = jc_start; jj < jc_end; jj += NC ) \ { \ /* Calculate the thread's current JC block dimension. */ \ const dim_t nc_cur = ( NC <= jc_end - jj ? NC : jc_left ); \ \ ctype* restrict b_jc = b_00 + jj * jcstep_b; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_pc = bli_thrinfo_sub_node( thread_jc ); \ bli_thrinfo_sup_grow( rntm, bszids_pc, thread_pc ); \ \ /* Compute the PC loop thread range for the current thread. */ \ const dim_t pc_start = 0, pc_end = k; \ const dim_t k_local = k; \ \ /* Compute number of primary and leftover components of the PC loop. */ \ /*const dim_t pc_iter = ( k_local + KC - 1 ) / KC;*/ \ const dim_t pc_left = k_local % KC; \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = pc_start; pp < pc_end; pp += KC ) \ { \ /* Calculate the thread's current PC block dimension. */ \ const dim_t kc_cur = ( KC <= pc_end - pp ? KC : pc_left ); \ \ ctype* restrict a_pc = a_00 + pp * pcstep_a; \ ctype* restrict d_pc = d_00 + pp * pcstep_d; \ ctype* restrict b_pc = b_jc + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? &beta_local : &one_local ); \ \ ctype* b_use; \ inc_t rs_b_use, cs_b_use, ps_b_use; \ \ /* Identify the current thrinfo_t node. Note that the thrinfo_t node will have already been created by a previous call to bli_thrinfo_sup_grow() since bszid_t values of BLIS_NO_PART cause the tree to grow by two (e.g. to the next bszid that is a normal bszid_t value). */ \ thread_pb = bli_thrinfo_sub_node( thread_pc ); \ /*bli_thrinfo_sup_grow( rntm, bszids_pb, thread_pb );*/ \ \ /* Determine the packing buffer and related parameters for matrix B. Then call the packm implementation. */ \ PASTECH2(bao_,ch,packm_b) \ ( \ conjb, \ KC, NC, \ kc_cur, nc_cur, NR, \ &one_local, \ d_pc, incd, \ b_pc, rs_b, cs_b, \ &b_use, &rs_b_use, &cs_b_use, \ &ps_b_use, \ cntx, \ rntm, \ &mem_b, \ thread_pb \ ); \ \ /* Alias b_use so that it's clear this is our current block of matrix B. */ \ ctype* restrict b_pc_use = b_use; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_ic = bli_thrinfo_sub_node( thread_pb ); \ bli_thrinfo_sup_grow( rntm, bszids_ic, thread_ic ); \ \ /* Compute the IC loop thread range for the current thread. */ \ dim_t ic_start, ic_end; \ bli_thread_range_sub( thread_ic, m, MR, FALSE, &ic_start, &ic_end ); \ const dim_t m_local = ic_end - ic_start; \ \ /* Compute number of primary and leftover components of the IC loop. */ \ /*const dim_t ic_iter = ( m_local + MC - 1 ) / MC;*/ \ const dim_t ic_left = m_local % MC; \ \ /* Loop over the m dimension (MC rows at a time). */ \ for ( dim_t ii = ic_start; ii < ic_end; ii += MC ) \ { \ /* Calculate the thread's current IC block dimension. */ \ const dim_t mc_cur = ( MC <= ic_end - ii ? MC : ic_left ); \ \ ctype* restrict a_ic = a_pc + ii * icstep_a; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ ctype* a_use; \ inc_t rs_a_use, cs_a_use, ps_a_use; \ \ /* Identify the current thrinfo_t node. Note that the thrinfo_t node will have already been created by a previous call to bli_thrinfo_sup_grow() since bszid_t values of BLIS_NO_PART cause the tree to grow by two (e.g. to the next bszid that is a normal bszid_t value). */ \ thread_pa = bli_thrinfo_sub_node( thread_ic ); \ /*bli_thrinfo_sup_grow( rntm, bszids_pa, thread_pa );*/ \ \ /* Determine the packing buffer and related parameters for matrix A. Then call the packm implementation. */ \ PASTECH2(bao_,ch,packm_a) \ ( \ conja, \ MC, KC, \ mc_cur, kc_cur, MR, \ &one_local, \ d_pc, incd, \ a_ic, rs_a, cs_a, \ &a_use, &rs_a_use, &cs_a_use, \ &ps_a_use, \ cntx, \ rntm, \ &mem_a, \ thread_pa \ ); \ \ /* Alias a_use so that it's clear this is our current block of matrix A. */ \ ctype* restrict a_ic_use = a_use; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_jr = bli_thrinfo_sub_node( thread_pa ); \ bli_thrinfo_sup_grow( rntm, bszids_jr, thread_jr ); \ \ /* Query the number of threads and thread ids for the JR loop. NOTE: These values are only needed when computing the next micropanel of B. */ \ const dim_t jr_nt = bli_thrinfo_n_way( thread_jr ); \ const dim_t jr_tid = bli_thrinfo_work_id( thread_jr ); \ \ /* Compute number of primary and leftover components of the JR loop. */ \ dim_t jr_iter = ( nc_cur + NR - 1 ) / NR; \ dim_t jr_left = nc_cur % NR; \ \ /* Compute the JR loop thread range for the current thread. */ \ dim_t jr_start, jr_end; \ bli_thread_range_sub( thread_jr, jr_iter, 1, FALSE, &jr_start, &jr_end ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( dim_t j = jr_start; j < jr_end; j += 1 ) \ { \ const dim_t nr_cur \ = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? NR : jr_left ); \ \ ctype* restrict b_jr = b_pc_use + j * ps_b_use; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* Assume for now that our next panel of B to be the current panel of B. */ \ ctype* restrict b2 = b_jr; \ \ /* Identify the current thrinfo_t node. */ \ thread_ir = bli_thrinfo_sub_node( thread_jr ); \ \ /* Query the number of threads and thread ids for the IR loop. NOTE: These values are only needed when computing the next micropanel of A. */ \ const dim_t ir_nt = bli_thrinfo_n_way( thread_ir ); \ const dim_t ir_tid = bli_thrinfo_work_id( thread_ir ); \ \ /* Compute number of primary and leftover components of the IR loop. */ \ dim_t ir_iter = ( mc_cur + MR - 1 ) / MR; \ dim_t ir_left = mc_cur % MR; \ \ /* Compute the IR loop thread range for the current thread. */ \ dim_t ir_start, ir_end; \ bli_thread_range_sub( thread_ir, ir_iter, 1, FALSE, &ir_start, &ir_end ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( dim_t i = ir_start; i < ir_end; i += 1 ) \ { \ const dim_t mr_cur \ = ( bli_is_not_edge_f( i, ir_iter, ir_left ) ? MR : ir_left ); \ \ ctype* restrict a_ir = a_ic_use + i * ps_a_use; \ ctype* restrict c_ir = c_jr + i * irstep_c; \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next micropanels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a_ir, ps_a_use, 1 ); \ if ( bli_is_last_iter( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_ic_use; \ b2 = bli_gemm_get_next_b_upanel( b_jr, ps_b_use, 1 ); \ if ( bli_is_last_iter( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_pc_use; \ } \ \ /* Save the addresses of next micropanels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Call a wrapper to the kernel (which handles edge cases). */ \ PASTECH2(bao_,ch,gemm_kernel) \ ( \ MR, \ NR, \ mr_cur, \ nr_cur, \ kc_cur, \ &alpha_local, \ a_ir, rs_a_use, cs_a_use, \ b_jr, rs_b_use, cs_b_use, \ beta_use, \ c_ir, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ \ /* This barrier is needed to prevent threads from starting to pack the next row panel of B before the current row panel is fully computed upon. */ \ bli_thrinfo_barrier( thread_pb ); \ } \ } \ \ /* Release any memory that was acquired for packing matrices A and B. */ \ PASTECH2(bao_,ch,packm_finalize_mem_a) \ ( \ rntm, \ &mem_a, \ thread_pa \ ); \ PASTECH2(bao_,ch,packm_finalize_mem_b) \ ( \ rntm, \ &mem_b, \ thread_pb \ ); \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmd_bp_var2: a1_packed", mr_cur, kc_cur, a_ir, rs_a_use, cs_a_use, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmd_bp_var2: b1_packed", kc_cur, nr_cur, b_jr, rs_b_use, cs_b_use, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmd_bp_var2: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%5.2f", "" ); \ */ \ } //INSERT_GENTFUNC_BASIC0( gemmd_bp_var2 ) GENTFUNC( float, s, gemmd_bp_var2 ) GENTFUNC( double, d, gemmd_bp_var2 ) GENTFUNC( scomplex, c, gemmd_bp_var2 ) GENTFUNC( dcomplex, z, gemmd_bp_var2 ) // // -- gemm-like microkernel wrapper -------------------------------------------- // #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ const dim_t MR, \ const dim_t NR, \ dim_t mr_cur, \ dim_t nr_cur, \ dim_t kc_cur, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict aux, \ cntx_t* restrict cntx \ ) \ { \ /* Infer the datatype from the ctype. */ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for the microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool col_pref = bli_cntx_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype zero = *PASTEMAC(ch,0); \ \ /* Handle interior and edge cases separately. */ \ if ( mr_cur == MR && nr_cur == NR ) \ { \ /* Invoke the gemm microkernel. */ \ gemm_ukr \ ( \ kc_cur, \ alpha, \ a, \ b, \ beta, \ c, rs_c, cs_c, \ aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm microkernel. */ \ gemm_ukr \ ( \ kc_cur, \ alpha, \ a, \ b, \ &zero, \ ct, rs_ct, cs_ct, \ aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn) \ ( \ mr_cur, \ nr_cur, \ ct, rs_ct, cs_ct, \ beta, \ c, rs_c, cs_c \ ); \ } \ } //INSERT_GENTFUNC_BASIC0( gemm_kernel ) GENTFUNC( float, s, gemm_kernel ) GENTFUNC( double, d, gemm_kernel ) GENTFUNC( scomplex, c, gemm_kernel ) GENTFUNC( dcomplex, z, gemm_kernel ) blis-1.1/addon/old/gemmd/attic/bli_gemm_ex.c000066400000000000000000000061141474157777200210060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_ex ( const obj_t* alpha, const obj_t* a, const obj_t* b, const obj_t* beta, const obj_t* c, const cntx_t* cntx, rntm_t* rntm ) { bli_init_once(); // A switch to easily toggle whether we use the addon implementation // of bao_gemmd() as the implementation for bli_gemm(). (This allows for // easy testing of bao_gemmd() via the testsuite.) if ( 1 ) { const dim_t k = bli_obj_width_after_trans( a ); const num_t dt = bli_obj_dt( c ); obj_t d; bli_obj_create( dt, k, 1, 1, k, &d ); bli_setv( &BLIS_ONE, &d ); //bli_randv( &d ); bao_gemmd_ex( alpha, a, &d, b, beta, c, cntx, rntm ); bli_obj_free( &d ); return; } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } else { rntm_l = *rntm; rntm = &rntm_l; } // Obtain a valid (native) context from the gks if necessary. if ( cntx == NULL ) cntx = bli_gks_query_cntx(); // Check the operands. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c, cntx ); // Invoke the operation's front end. bli_gemm_front ( ( obj_t* )alpha, ( obj_t* )a, ( obj_t* )b, ( obj_t* )beta, ( obj_t* )c, ( cntx_t* )cntx, ( rntm_t* )rntm, NULL ); } blis-1.1/addon/old/gemmd/bao_gemmd.c000066400000000000000000000211071474157777200173440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // -- Define the gemmd operation's object API ---------------------------------- // void bao_gemmd ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c ) { bao_gemmd_ex ( alpha, a, d, b, beta, c, NULL, NULL ); } void bao_gemmd_ex ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { bli_init_once(); // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } else { rntm_l = *rntm; rntm = &rntm_l; } // Set the .pack_a and .pack_b fields to TRUE. This is only needed because // this addon uses bli_thrinfo_sup_grow(), which calls // bli_thrinfo_sup_create_for_cntl(), which employs an optimization if // both fields are FALSE (as is often the case with sup). However, this // addon implements the "large" code path, and so both A and B must // always be packed. Setting the fields to TRUE will avoid the optimization // while this addon implementation executes (and it also reinforces the // fact that we *are* indeed packing A and B, albeit not in the sup context // originally envisioned for the .pack_a and .pack_b fields). bli_rntm_set_pack_a( TRUE, rntm ); bli_rntm_set_pack_b( TRUE, rntm ); // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. if ( cntx == NULL ) cntx = ( cntx_t* )bli_gks_query_cntx(); // Check parameters. if ( bli_error_checking_is_enabled() ) bao_gemmd_check( alpha, a, d, b, beta, c, cntx ); // -- bao_gemmd_front() ---------------------------------------------------- obj_t a_local; obj_t b_local; obj_t c_local; // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( c ) ) { return; } // If alpha is zero, or if A or B has a zero dimension, scale C by beta // and return early. if ( bli_obj_equals( alpha, &BLIS_ZERO ) || bli_obj_has_zero_dim( a ) || bli_obj_has_zero_dim( b ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); // Induce a transposition of A if it has its transposition property set. // Then clear the transposition bit in the object. if ( bli_obj_has_trans( &a_local ) ) { bli_obj_induce_trans( &a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); } // Induce a transposition of B if it has its transposition property set. // Then clear the transposition bit in the object. if ( bli_obj_has_trans( &b_local ) ) { bli_obj_induce_trans( &b_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &b_local ); } // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_dislikes_storage_of( &c_local, BLIS_GEMM_VIR_UKR, cntx ) ) { bli_obj_swap( &a_local, &b_local ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_GEMM, BLIS_LEFT, // ignored for gemm/hemm/symm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // Spawn threads (if applicable), where bao_gemmd_int() is the thread entry // point function for each thread. This also begins the process of creating // the thrinfo_t tree, which contains thread communicators. bao_l3_thread_decorator ( bao_gemmd_int, BLIS_GEMM, // operation family id alpha, &a_local, d, &b_local, beta, &c_local, cntx, rntm ); } // // -- Define the gemmd operation's thread entry point -------------------------- // void bao_gemmd_int ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ) { // In this function, we choose the gemmd implementation that is executed // on each thread. // Call the block-panel algorithm. bao_gemmd_bp_var1 ( alpha, a, d, b, beta, c, cntx, rntm, thread ); } // // -- Define the gemmd operation's typed API ----------------------------------- // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* d, inc_t incd, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ ) \ { \ bli_init_once(); \ \ /* Determine the datatype (e.g. BLIS_FLOAT, BLIS_DOUBLE, etc.) based on the macro parameter 'ch' (e.g. s, d, etc). */ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, dd, bo, betao, co; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ \ /* Adjust the dimensions of matrices A and B according to the transa and transb parameters. */ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, k, n, &m_b, &n_b ); \ \ /* Create bufferless scalar objects and attach the provided scalar pointers to those scalar objects. */ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ /* Create bufferless matrix objects and attach the provided matrix pointers to those matrix objects. */ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, k, 1, d, incd, k, &dd ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ \ /* Set the transposition/conjugation properties of the objects for matrices A and B. */ \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ /* Call the object interface. */ \ PASTECH(bao_,opname) \ ( \ &alphao, \ &ao, \ &dd, \ &bo, \ &betao, \ &co \ ); \ } //INSERT_GENTFUNC_BASIC0( gemmd ) GENTFUNC( float, s, gemmd ) GENTFUNC( double, d, gemmd ) GENTFUNC( scomplex, c, gemmd ) GENTFUNC( dcomplex, z, gemmd ) blis-1.1/addon/old/gemmd/bao_gemmd.h000066400000000000000000000060741474157777200173570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Prototype the gemmd operation's object API ------------------------------- // BLIS_EXPORT_ADDON void bao_gemmd ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c ); BLIS_EXPORT_ADDON void bao_gemmd_ex ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); // // -- Prototype the gemmd operation's thread entry point ----------------------- // void bao_gemmd_int ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); // // -- Prototype the gemmd operation's typed API -------------------------------- // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_ADDON void PASTECH2(bao_,ch,opname) \ ( \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* d, inc_t incd, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ ); //INSERT_GENTPROT_BASIC0( gemmd ) GENTPROT( float, s, gemmd ) GENTPROT( double, d, gemmd ) GENTPROT( scomplex, c, gemmd ) GENTPROT( dcomplex, z, gemmd ) blis-1.1/addon/old/gemmd/bao_gemmd_bp_var1.c000066400000000000000000000413161474157777200207620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemmd_fp typedef void (*FUNCPTR_T) ( conj_t conja, conj_t conjb, dim_t m, dim_t n, dim_t k, void* restrict alpha, void* restrict a, inc_t rs_a, inc_t cs_a, void* restrict d, inc_t incd, void* restrict b, inc_t rs_b, inc_t cs_b, void* restrict beta, void* restrict c, inc_t rs_c, inc_t cs_c, cntx_t* restrict cntx, rntm_t* restrict rntm, thrinfo_t* restrict thread ); // // -- gemmd-like block-panel algorithm (object interface) ---------------------- // // Define a function pointer array named ftypes and initialize its contents with // the addresses of the typed functions defined below, bao_?gemmd_bp_var1(). static FUNCPTR_T GENARRAY_PREF(ftypes,bao_,gemmd_bp_var1); void bao_gemmd_bp_var1 ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ) { const num_t dt = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( a ); void* restrict buf_a = bli_obj_buffer_at_off( a ); const inc_t rs_a = bli_obj_row_stride( a ); const inc_t cs_a = bli_obj_col_stride( a ); void* restrict buf_d = bli_obj_buffer_at_off( d ); const inc_t incd = bli_obj_vector_inc( d ); void* restrict buf_b = bli_obj_buffer_at_off( b ); const inc_t rs_b = bli_obj_row_stride( b ); const inc_t cs_b = bli_obj_col_stride( b ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt, beta ); // Index into the function pointer array to extract the correct // typed function pointer based on the chosen datatype. FUNCPTR_T f = ftypes[dt]; // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_d, incd, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } // // -- gemmd-like block-panel algorithm (typed interface) ----------------------- // #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict d, inc_t incd, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ thrinfo_t* restrict thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for various blocksizes. */ \ const dim_t NR = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t NC = bli_cntx_get_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t MC = bli_cntx_get_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t KC = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); \ \ /* Query the context for the microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = cs_c; \ const inc_t jcstep_b = cs_b; \ \ const inc_t pcstep_a = cs_a; \ const inc_t pcstep_d = incd; \ const inc_t pcstep_b = rs_b; \ \ const inc_t icstep_c = rs_c; \ const inc_t icstep_a = rs_a; \ \ const inc_t jrstep_c = cs_c * NR; \ \ const inc_t irstep_c = rs_c * MR; \ \ ctype* restrict a_00 = a; \ ctype* restrict d_00 = d; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ /* Make local copies of the scalars to prevent any unnecessary sharing of cache lines between the cores' caches. */ \ ctype alpha_local = *alpha_cast; \ ctype beta_local = *beta_cast; \ ctype one_local = *PASTEMAC(ch,1); \ \ auxinfo_t aux; \ \ /* Initialize a mem_t entry for A and B. Strictly speaking, this is only needed for the matrix we will be packing (if any), but we do it unconditionally to be safe. */ \ mem_t mem_a = BLIS_MEM_INITIALIZER; \ mem_t mem_b = BLIS_MEM_INITIALIZER; \ \ /* Define an array of bszid_t ids, which will act as our substitute for the cntl_t tree. */ \ bszid_t bszids[8] = { BLIS_NC, /* 5th loop */ \ BLIS_KC, /* 4th loop */ \ BLIS_NO_PART, /* pack B */ \ BLIS_MC, /* 3rd loop */ \ BLIS_NO_PART, /* pack A */ \ BLIS_NR, /* 2nd loop */ \ BLIS_MR, /* 1st loop */ \ BLIS_KR }; /* microkernel loop */ \ \ bszid_t* restrict bszids_jc = &bszids[0]; \ bszid_t* restrict bszids_pc = &bszids[1]; \ /*bszid_t* restrict bszids_pb = &bszids[2];*/ \ bszid_t* restrict bszids_ic = &bszids[3]; \ /*bszid_t* restrict bszids_pa = &bszids[4];*/ \ bszid_t* restrict bszids_jr = &bszids[5]; \ /*bszid_t* restrict bszids_ir = &bszids[6];*/ \ \ thrinfo_t* restrict thread_jc = NULL; \ thrinfo_t* restrict thread_pc = NULL; \ thrinfo_t* restrict thread_pb = NULL; \ thrinfo_t* restrict thread_ic = NULL; \ thrinfo_t* restrict thread_pa = NULL; \ thrinfo_t* restrict thread_jr = NULL; \ thrinfo_t* restrict thread_ir = NULL; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_jc = thread; \ bli_thrinfo_sup_grow( rntm, bszids_jc, thread_jc ); \ \ /* Compute the JC loop thread range for the current thread. */ \ dim_t jc_start, jc_end; \ bli_thread_range_sub( thread_jc, n, NR, FALSE, &jc_start, &jc_end ); \ const dim_t n_local = jc_end - jc_start; \ \ /* Compute number of primary and leftover components of the JC loop. */ \ /*const dim_t jc_iter = ( n_local + NC - 1 ) / NC;*/ \ const dim_t jc_left = n_local % NC; \ \ /* Loop over the n dimension (NC rows/columns at a time). */ \ for ( dim_t jj = jc_start; jj < jc_end; jj += NC ) \ { \ /* Calculate the thread's current JC block dimension. */ \ const dim_t nc_cur = ( NC <= jc_end - jj ? NC : jc_left ); \ \ ctype* restrict b_jc = b_00 + jj * jcstep_b; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_pc = bli_thrinfo_sub_node( thread_jc ); \ bli_thrinfo_sup_grow( rntm, bszids_pc, thread_pc ); \ \ /* Compute the PC loop thread range for the current thread. */ \ const dim_t pc_start = 0, pc_end = k; \ const dim_t k_local = k; \ \ /* Compute number of primary and leftover components of the PC loop. */ \ /*const dim_t pc_iter = ( k_local + KC - 1 ) / KC;*/ \ const dim_t pc_left = k_local % KC; \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = pc_start; pp < pc_end; pp += KC ) \ { \ /* Calculate the thread's current PC block dimension. */ \ const dim_t kc_cur = ( KC <= pc_end - pp ? KC : pc_left ); \ \ ctype* restrict a_pc = a_00 + pp * pcstep_a; \ ctype* restrict d_pc = d_00 + pp * pcstep_d; \ ctype* restrict b_pc = b_jc + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? &beta_local : &one_local ); \ \ ctype* b_use; \ inc_t rs_b_use, cs_b_use, ps_b_use; \ \ /* Identify the current thrinfo_t node. Note that the thrinfo_t node will have already been created by a previous call to bli_thrinfo_sup_grow() since bszid_t values of BLIS_NO_PART cause the tree to grow by two (e.g. to the next bszid that is a normal bszid_t value). */ \ thread_pb = bli_thrinfo_sub_node( thread_pc ); \ /*bli_thrinfo_sup_grow( rntm, bszids_pb, thread_pb );*/ \ \ /* Determine the packing buffer and related parameters for matrix B. Then call the packm implementation. */ \ PASTECH2(bao_,ch,packm_b) \ ( \ conjb, \ KC, NC, \ kc_cur, nc_cur, NR, \ &one_local, \ d_pc, incd, \ b_pc, rs_b, cs_b, \ &b_use, &rs_b_use, &cs_b_use, \ &ps_b_use, \ cntx, \ rntm, \ &mem_b, \ thread_pb \ ); \ \ /* Alias b_use so that it's clear this is our current block of matrix B. */ \ ctype* restrict b_pc_use = b_use; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_ic = bli_thrinfo_sub_node( thread_pb ); \ bli_thrinfo_sup_grow( rntm, bszids_ic, thread_ic ); \ \ /* Compute the IC loop thread range for the current thread. */ \ dim_t ic_start, ic_end; \ bli_thread_range_sub( thread_ic, m, MR, FALSE, &ic_start, &ic_end ); \ const dim_t m_local = ic_end - ic_start; \ \ /* Compute number of primary and leftover components of the IC loop. */ \ /*const dim_t ic_iter = ( m_local + MC - 1 ) / MC;*/ \ const dim_t ic_left = m_local % MC; \ \ /* Loop over the m dimension (MC rows at a time). */ \ for ( dim_t ii = ic_start; ii < ic_end; ii += MC ) \ { \ /* Calculate the thread's current IC block dimension. */ \ const dim_t mc_cur = ( MC <= ic_end - ii ? MC : ic_left ); \ \ ctype* restrict a_ic = a_pc + ii * icstep_a; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ ctype* a_use; \ inc_t rs_a_use, cs_a_use, ps_a_use; \ \ /* Identify the current thrinfo_t node. Note that the thrinfo_t node will have already been created by a previous call to bli_thrinfo_sup_grow() since bszid_t values of BLIS_NO_PART cause the tree to grow by two (e.g. to the next bszid that is a normal bszid_t value). */ \ thread_pa = bli_thrinfo_sub_node( thread_ic ); \ /*bli_thrinfo_sup_grow( rntm, bszids_pa, thread_pa );*/ \ \ /* Determine the packing buffer and related parameters for matrix A. Then call the packm implementation. */ \ PASTECH2(bao_,ch,packm_a) \ ( \ conja, \ MC, KC, \ mc_cur, kc_cur, MR, \ &one_local, \ d_pc, incd, \ a_ic, rs_a, cs_a, \ &a_use, &rs_a_use, &cs_a_use, \ &ps_a_use, \ cntx, \ rntm, \ &mem_a, \ thread_pa \ ); \ \ /* Alias a_use so that it's clear this is our current block of matrix A. */ \ ctype* restrict a_ic_use = a_use; \ \ /* Identify the current thrinfo_t node and then grow the tree. */ \ thread_jr = bli_thrinfo_sub_node( thread_pa ); \ bli_thrinfo_sup_grow( rntm, bszids_jr, thread_jr ); \ \ /* Query the number of threads and thread ids for the JR loop. NOTE: These values are only needed when computing the next micropanel of B. */ \ const dim_t jr_nt = bli_thrinfo_n_way( thread_jr ); \ const dim_t jr_tid = bli_thrinfo_work_id( thread_jr ); \ \ /* Compute number of primary and leftover components of the JR loop. */ \ dim_t jr_iter = ( nc_cur + NR - 1 ) / NR; \ dim_t jr_left = nc_cur % NR; \ \ /* Compute the JR loop thread range for the current thread. */ \ dim_t jr_start, jr_end; \ bli_thread_range_sub( thread_jr, jr_iter, 1, FALSE, &jr_start, &jr_end ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( dim_t j = jr_start; j < jr_end; j += 1 ) \ { \ const dim_t nr_cur \ = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? NR : jr_left ); \ \ ctype* restrict b_jr = b_pc_use + j * ps_b_use; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* Assume for now that our next panel of B to be the current panel of B. */ \ ctype* restrict b2 = b_jr; \ \ /* Identify the current thrinfo_t node. */ \ thread_ir = bli_thrinfo_sub_node( thread_jr ); \ \ /* Query the number of threads and thread ids for the IR loop. NOTE: These values are only needed when computing the next micropanel of A. */ \ const dim_t ir_nt = bli_thrinfo_n_way( thread_ir ); \ const dim_t ir_tid = bli_thrinfo_work_id( thread_ir ); \ \ /* Compute number of primary and leftover components of the IR loop. */ \ dim_t ir_iter = ( mc_cur + MR - 1 ) / MR; \ dim_t ir_left = mc_cur % MR; \ \ /* Compute the IR loop thread range for the current thread. */ \ dim_t ir_start, ir_end; \ bli_thread_range_sub( thread_ir, ir_iter, 1, FALSE, &ir_start, &ir_end ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( dim_t i = ir_start; i < ir_end; i += 1 ) \ { \ const dim_t mr_cur \ = ( bli_is_not_edge_f( i, ir_iter, ir_left ) ? MR : ir_left ); \ \ ctype* restrict a_ir = a_ic_use + i * ps_a_use; \ ctype* restrict c_ir = c_jr + i * irstep_c; \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next micropanels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a_ir, ps_a_use, 1 ); \ if ( bli_is_last_iter( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_ic_use; \ b2 = bli_gemm_get_next_b_upanel( b_jr, ps_b_use, 1 ); \ if ( bli_is_last_iter( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_pc_use; \ } \ \ /* Save the addresses of next micropanels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Invoke the gemm microkernel. */ \ gemm_ukr \ ( \ mr_cur, \ nr_cur, \ kc_cur, \ &alpha_local, \ a_ir, \ b_jr, \ beta_use, \ c_ir, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ \ /* This barrier is needed to prevent threads from starting to pack the next row panel of B before the current row panel is fully computed upon. */ \ bli_thrinfo_barrier( thread_pb ); \ } \ } \ \ /* Release any memory that was acquired for packing matrices A and B. */ \ PASTECH2(bao_,ch,packm_finalize_mem_a) \ ( \ rntm, \ &mem_a, \ thread_pa \ ); \ PASTECH2(bao_,ch,packm_finalize_mem_b) \ ( \ rntm, \ &mem_b, \ thread_pb \ ); \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmd_bp_var1: a1_packed", mr_cur, kc_cur, a_ir, rs_a_use, cs_a_use, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmd_bp_var1: b1_packed", kc_cur, nr_cur, b_jr, rs_b_use, cs_b_use, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmd_bp_var1: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%5.2f", "" ); \ */ \ } //INSERT_GENTFUNC_BASIC0( gemmd_bp_var1 ) GENTFUNC( float, s, gemmd_bp_var1 ) GENTFUNC( double, d, gemmd_bp_var1 ) GENTFUNC( scomplex, c, gemmd_bp_var1 ) GENTFUNC( dcomplex, z, gemmd_bp_var1 ) blis-1.1/addon/old/gemmd/bao_gemmd_check.c000066400000000000000000000075301474157777200205050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bao_gemmd_check ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( d ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( b ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( c ); bli_check_error_code( e_val ); // Check scalar/vector/matrix type. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( d ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( b ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( c ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( d ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( b ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( c ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_level3_dims( a, b, c ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( d, bli_obj_width_after_trans( a ) ); bli_check_error_code( e_val ); // Check for consistent datatypes. // NOTE: We only perform these tests when mixed datatype support is // disabled. e_val = bli_check_consistent_object_datatypes( c, a ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, d ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, b ); bli_check_error_code( e_val ); } blis-1.1/addon/old/gemmd/bao_gemmd_check.h000066400000000000000000000035471474157777200205160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // void bao_gemmd_check ( obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ); blis-1.1/addon/old/gemmd/bao_gemmd_var.h000066400000000000000000000072601474157777200202250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype the object-based variant interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTECH(bao_,opname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* d, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ); GENPROT( gemmd_bp_var1 ) // // Prototype the typed variant interfaces. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict d, inc_t incd, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ thrinfo_t* restrict thread \ ); //INSERT_GENTPROT_BASIC0( gemmd_bp_var1 ) GENTPROT( float, s, gemmd_bp_var1 ) GENTPROT( double, d, gemmd_bp_var1 ) GENTPROT( scomplex, c, gemmd_bp_var1 ) GENTPROT( dcomplex, z, gemmd_bp_var1 ) // // Prototype the typed kernel interfaces. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ const dim_t MR, \ const dim_t NR, \ dim_t mr_cur, \ dim_t nr_cur, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict aux, \ cntx_t* restrict cntx \ ); //INSERT_GENTPROT_BASIC0( gemm_kernel ) GENTPROT( float, s, gemm_kernel ) GENTPROT( double, d, gemm_kernel ) GENTPROT( scomplex, c, gemm_kernel ) GENTPROT( dcomplex, z, gemm_kernel ) blis-1.1/addon/old/gemmd/bao_l3_packm_a.c000066400000000000000000000241111474157777200202420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ dim_t m, \ dim_t k, \ dim_t mr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Set the pack buffer type so that we are obtaining memory blocks from the pool dedicated to blocks of A. */ \ const packbuf_t pack_buf_type = BLIS_BUFFER_FOR_A_BLOCK; \ \ /* NOTE: This "rounding up" of the last upanel is absolutely necessary since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? Because the microkernel assumes that the register (MR, NR) AND storage (PACKMR, PACKNR) blocksizes do not change. */ \ const dim_t m_pack = ( m / mr + ( m % mr ? 1 : 0 ) ) * mr; \ const dim_t k_pack = k; \ \ /* Barrier to make sure all threads are caught up and ready to begin the packm stage. */ \ bli_thrinfo_barrier( thread ); \ \ /* Compute the size of the memory block eneded. */ \ siz_t size_needed = sizeof( ctype ) * m_pack * k_pack; \ \ /* Check the mem_t entry provided by the caller. If it is unallocated, then we need to acquire a block from the packed block allocator. */ \ if ( bli_mem_is_unalloc( mem ) ) \ { \ if ( bli_thread_am_ochief( thread ) ) \ { \ /* Acquire directly to the chief thread's mem_t that was passed in. It needs to be that mem_t struct, and not a local (temporary) mem_t, since there is no barrier until after packing is finished, which could allow a race condition whereby the chief thread exits the current function before the other threads have a chance to copy from it. (A barrier would fix that race condition, but then again, I prefer to keep barriers to a minimum.) */ \ bli_pba_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type, \ mem \ ); \ } \ \ /* Broadcast the address of the chief thread's passed-in mem_t to all threads. */ \ mem_t* mem_p = bli_thrinfo_broadcast( thread, mem ); \ \ /* Non-chief threads: Copy the contents of the chief thread's passed-in mem_t to the passed-in mem_t for this thread. (The chief thread already has the mem_t, so it does not need to perform any copy.) */ \ if ( !bli_thread_am_ochief( thread ) ) \ { \ *mem = *mem_p; \ } \ } \ else /* if ( bli_mem_is_alloc( mem ) ) */ \ { \ /* If the mem_t entry provided by the caller does NOT contain a NULL buffer, then a block has already been acquired from the packed block allocator and cached by the caller. */ \ \ /* As a sanity check, we should make sure that the mem_t object isn't associated with a block that is too small compared to the size of the packed matrix buffer that is needed, according to the value computed above. */ \ siz_t mem_size = bli_mem_size( mem ); \ \ if ( mem_size < size_needed ) \ { \ if ( bli_thread_am_ochief( thread ) ) \ { \ /* The chief thread releases the existing block associated with the mem_t, and then re-acquires a new block, saving the associated mem_t to its passed-in mem_t. (See coment above for why the acquisition needs to be directly to the chief thread's passed-in mem_t and not a local (temporary) mem_t. */ \ bli_pba_release \ ( \ rntm, \ mem \ ); \ bli_pba_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type, \ mem \ ); \ } \ \ /* Broadcast the address of the chief thread's passed-in mem_t to all threads. */ \ mem_t* mem_p = bli_thrinfo_broadcast( thread, mem ); \ \ /* Non-chief threads: Copy the contents of the chief thread's passed-in mem_t to the passed-in mem_t for this thread. (The chief thread already has the mem_t, so it does not need to perform any copy.) */ \ if ( !bli_thread_am_ochief( thread ) ) \ { \ *mem = *mem_p; \ } \ } \ else \ { \ /* If the mem_t entry is already allocated and sufficiently large, then we use it as-is. No action is needed. */ \ } \ } \ } //INSERT_GENTFUNC_BASIC0( packm_init_mem_a ) GENTFUNC( float, s, packm_init_mem_a ) GENTFUNC( double, d, packm_init_mem_a ) GENTFUNC( scomplex, c, packm_init_mem_a ) GENTFUNC( dcomplex, z, packm_init_mem_a ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ if ( thread != NULL ) \ if ( bli_thread_am_ochief( thread ) ) \ { \ /* Check the mem_t entry provided by the caller. Only proceed if it is allocated, which it should be. */ \ if ( bli_mem_is_alloc( mem ) ) \ { \ bli_pba_release \ ( \ rntm, \ mem \ ); \ } \ } \ } //INSERT_GENTFUNC_BASIC0( packm_finalize_mem_a ) GENTFUNC( float, s, packm_finalize_mem_a ) GENTFUNC( double, d, packm_finalize_mem_a ) GENTFUNC( scomplex, c, packm_finalize_mem_a ) GENTFUNC( dcomplex, z, packm_finalize_mem_a ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ pack_t* restrict schema, \ dim_t m, \ dim_t k, \ dim_t mr, \ dim_t* restrict m_max, \ dim_t* restrict k_max, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ mem_t* restrict mem \ ) \ { \ /* NOTE: This "rounding up" of the last upanel is absolutely necessary since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? Because the microkernel assumes that the register (MR, NR) AND storage (PACKMR, PACKNR) blocksizes do not change. */ \ *m_max = ( m / mr + ( m % mr ? 1 : 0 ) ) * mr; \ *k_max = k; \ \ /* Determine the dimensions and strides for the packed matrix A. */ \ { \ /* Pack A to column-stored row-panels. */ \ *rs_p = 1; \ *cs_p = mr; \ \ *pd_p = mr; \ *ps_p = mr * k; \ \ /* Set the schema to "packed row panels" to indicate packing to conventional column-stored row panels. */ \ *schema = BLIS_PACKED_ROW_PANELS; \ } \ \ /* Set the buffer address provided by the caller to point to the memory associated with the mem_t entry acquired from the memory pool. */ \ *p = bli_mem_buffer( mem ); \ } //INSERT_GENTFUNC_BASIC0( packm_init_a ) GENTFUNC( float, s, packm_init_a ) GENTFUNC( double, d, packm_init_a ) GENTFUNC( scomplex, c, packm_init_a ) GENTFUNC( dcomplex, z, packm_init_a ) // // Define BLAS-like interfaces to the variant chooser. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ conj_t conj, \ dim_t m_alloc, \ dim_t k_alloc, \ dim_t m, \ dim_t k, \ dim_t mr, \ ctype* restrict kappa, \ ctype* restrict d, inc_t incd, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ pack_t schema; \ dim_t m_max; \ dim_t k_max; \ dim_t pd_p; \ \ /* Prepare the packing destination buffer. */ \ PASTECH2(bao_,ch,packm_init_mem_a) \ ( \ m_alloc, k_alloc, mr, \ cntx, \ rntm, \ mem, \ thread \ ); \ \ /* Determine the packing buffer and related parameters for matrix A. */ \ PASTECH2(bao_,ch,packm_init_a) \ ( \ &schema, \ m, k, mr, \ &m_max, &k_max, \ p, rs_p, cs_p, \ &pd_p, ps_p, \ mem \ ); \ \ /* Pack matrix A to the destination buffer chosen above. Here, the packed matrix is stored to column-stored MR x k micropanels. */ \ PASTECH2(bao_,ch,packm_var1) \ ( \ conj, \ schema, \ m, \ k, \ m_max, \ k_max, \ kappa, \ d, incd, \ a, rs_a, cs_a, \ *p, *rs_p, *cs_p, \ pd_p, *ps_p, \ cntx, \ thread \ ); \ \ /* Barrier so that packing is done before computation. */ \ bli_thrinfo_barrier( thread ); \ } //INSERT_GENTFUNC_BASIC0( packm_a ) GENTFUNC( float, s, packm_a ) GENTFUNC( double, d, packm_a ) GENTFUNC( scomplex, c, packm_a ) GENTFUNC( dcomplex, z, packm_a ) blis-1.1/addon/old/gemmd/bao_l3_packm_a.h000066400000000000000000000102211474157777200202440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ dim_t m, \ dim_t k, \ dim_t mr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ //INSERT_GENTPROT_BASIC0( packm_init_mem_a ) GENTPROT( float, s, packm_init_mem_a ) GENTPROT( double, d, packm_init_mem_a ) GENTPROT( scomplex, c, packm_init_mem_a ) GENTPROT( dcomplex, z, packm_init_mem_a ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ //INSERT_GENTPROT_BASIC0( packm_finalize_mem_a ) GENTPROT( float, s, packm_finalize_mem_a ) GENTPROT( double, d, packm_finalize_mem_a ) GENTPROT( scomplex, c, packm_finalize_mem_a ) GENTPROT( dcomplex, z, packm_finalize_mem_a ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ pack_t* restrict schema, \ dim_t m, \ dim_t k, \ dim_t mr, \ dim_t* restrict m_max, \ dim_t* restrict k_max, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ mem_t* restrict mem \ ); \ //INSERT_GENTPROT_BASIC0( packm_init_a ) GENTPROT( float, s, packm_init_a ) GENTPROT( double, d, packm_init_a ) GENTPROT( scomplex, c, packm_init_a ) GENTPROT( dcomplex, z, packm_init_a ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ conj_t conj, \ dim_t m_alloc, \ dim_t k_alloc, \ dim_t m, \ dim_t k, \ dim_t mr, \ ctype* restrict kappa, \ ctype* restrict d, inc_t incd, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ //INSERT_GENTPROT_BASIC0( packm_a ) GENTPROT( float, s, packm_a ) GENTPROT( double, d, packm_a ) GENTPROT( scomplex, c, packm_a ) GENTPROT( dcomplex, z, packm_a ) blis-1.1/addon/old/gemmd/bao_l3_packm_b.c000066400000000000000000000241111474157777200202430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ dim_t k, \ dim_t n, \ dim_t nr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Set the pack buffer type so that we are obtaining memory blocks from the pool dedicated to panels of B. */ \ const packbuf_t pack_buf_type = BLIS_BUFFER_FOR_B_PANEL; \ \ /* NOTE: This "rounding up" of the last upanel is absolutely necessary since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? Because the microkernel assumes that the register (MR, NR) AND storage (PACKMR, PACKNR) blocksizes do not change. */ \ const dim_t k_pack = k; \ const dim_t n_pack = ( n / nr + ( n % nr ? 1 : 0 ) ) * nr; \ \ /* Barrier to make sure all threads are caught up and ready to begin the packm stage. */ \ bli_thrinfo_barrier( thread ); \ \ /* Compute the size of the memory block eneded. */ \ siz_t size_needed = sizeof( ctype ) * k_pack * n_pack; \ \ /* Check the mem_t entry provided by the caller. If it is unallocated, then we need to acquire a block from the packed block allocator. */ \ if ( bli_mem_is_unalloc( mem ) ) \ { \ if ( bli_thread_am_ochief( thread ) ) \ { \ /* Acquire directly to the chief thread's mem_t that was passed in. It needs to be that mem_t struct, and not a local (temporary) mem_t, since there is no barrier until after packing is finished, which could allow a race condition whereby the chief thread exits the current function before the other threads have a chance to copy from it. (A barrier would fix that race condition, but then again, I prefer to keep barriers to a minimum.) */ \ bli_pba_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type, \ mem \ ); \ } \ \ /* Broadcast the address of the chief thread's passed-in mem_t to all threads. */ \ mem_t* mem_p = bli_thrinfo_broadcast( thread, mem ); \ \ /* Non-chief threads: Copy the contents of the chief thread's passed-in mem_t to the passed-in mem_t for this thread. (The chief thread already has the mem_t, so it does not need to perform any copy.) */ \ if ( !bli_thread_am_ochief( thread ) ) \ { \ *mem = *mem_p; \ } \ } \ else /* if ( bli_mem_is_alloc( mem ) ) */ \ { \ /* If the mem_t entry provided by the caller does NOT contain a NULL buffer, then a block has already been acquired from the packed block allocator and cached by the caller. */ \ \ /* As a sanity check, we should make sure that the mem_t object isn't associated with a block that is too small compared to the size of the packed matrix buffer that is needed, according to the value computed above. */ \ siz_t mem_size = bli_mem_size( mem ); \ \ if ( mem_size < size_needed ) \ { \ if ( bli_thread_am_ochief( thread ) ) \ { \ /* The chief thread releases the existing block associated with the mem_t, and then re-acquires a new block, saving the associated mem_t to its passed-in mem_t. (See coment above for why the acquisition needs to be directly to the chief thread's passed-in mem_t and not a local (temporary) mem_t. */ \ bli_pba_release \ ( \ rntm, \ mem \ ); \ bli_pba_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type, \ mem \ ); \ } \ \ /* Broadcast the address of the chief thread's passed-in mem_t to all threads. */ \ mem_t* mem_p = bli_thrinfo_broadcast( thread, mem ); \ \ /* Non-chief threads: Copy the contents of the chief thread's passed-in mem_t to the passed-in mem_t for this thread. (The chief thread already has the mem_t, so it does not need to perform any copy.) */ \ if ( !bli_thread_am_ochief( thread ) ) \ { \ *mem = *mem_p; \ } \ } \ else \ { \ /* If the mem_t entry is already allocated and sufficiently large, then we use it as-is. No action is needed. */ \ } \ } \ } //INSERT_GENTFUNC_BASIC0( packm_init_mem_b ) GENTFUNC( float, s, packm_init_mem_b ) GENTFUNC( double, d, packm_init_mem_b ) GENTFUNC( scomplex, c, packm_init_mem_b ) GENTFUNC( dcomplex, z, packm_init_mem_b ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ if ( thread != NULL ) \ if ( bli_thread_am_ochief( thread ) ) \ { \ /* Check the mem_t entry provided by the caller. Only proceed if it is allocated, which it should be. */ \ if ( bli_mem_is_alloc( mem ) ) \ { \ bli_pba_release \ ( \ rntm, \ mem \ ); \ } \ } \ } //INSERT_GENTFUNC_BASIC0( packm_finalize_mem_b ) GENTFUNC( float, s, packm_finalize_mem_b ) GENTFUNC( double, d, packm_finalize_mem_b ) GENTFUNC( scomplex, c, packm_finalize_mem_b ) GENTFUNC( dcomplex, z, packm_finalize_mem_b ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ pack_t* restrict schema, \ dim_t k, \ dim_t n, \ dim_t nr, \ dim_t* restrict k_max, \ dim_t* restrict n_max, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ mem_t* restrict mem \ ) \ { \ /* NOTE: This "rounding up" of the last upanel is absolutely necessary since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? Because the microkernel assumes that the register (MR, NR) AND storage (PACKMR, PACKNR) blocksizes do not change. */ \ *k_max = k; \ *n_max = ( n / nr + ( n % nr ? 1 : 0 ) ) * nr; \ \ /* Determine the dimensions and strides for the packed matrix B. */ \ { \ /* Pack B to row-stored column-panels. */ \ *rs_p = nr; \ *cs_p = 1; \ \ *pd_p = nr; \ *ps_p = k * nr; \ \ /* Set the schema to "packed column panels" to indicate packing to conventional row-stored column panels. */ \ *schema = BLIS_PACKED_COL_PANELS; \ } \ \ /* Set the buffer address provided by the caller to point to the memory associated with the mem_t entry acquired from the memory pool. */ \ *p = bli_mem_buffer( mem ); \ } //INSERT_GENTFUNC_BASIC0( packm_init_b ) GENTFUNC( float, s, packm_init_b ) GENTFUNC( double, d, packm_init_b ) GENTFUNC( scomplex, c, packm_init_b ) GENTFUNC( dcomplex, z, packm_init_b ) // // Define BLAS-like interfaces to the variant chooser. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ conj_t conj, \ dim_t k_alloc, \ dim_t n_alloc, \ dim_t k, \ dim_t n, \ dim_t nr, \ ctype* restrict kappa, \ ctype* restrict d, inc_t incd, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ pack_t schema; \ dim_t k_max; \ dim_t n_max; \ dim_t pd_p; \ \ /* Prepare the packing destination buffer. */ \ PASTECH2(bao_,ch,packm_init_mem_b) \ ( \ k_alloc, n_alloc, nr, \ cntx, \ rntm, \ mem, \ thread \ ); \ \ /* Determine the packing buffer and related parameters for matrix B. */ \ PASTECH2(bao_,ch,packm_init_b) \ ( \ &schema, \ k, n, nr, \ &k_max, &n_max, \ p, rs_p, cs_p, \ &pd_p, ps_p, \ mem \ ); \ \ /* Pack matrix B to the destination buffer chosen above. Here, the packed matrix is stored to row-stored k x NR micropanels. */ \ PASTECH2(bao_,ch,packm_var1) \ ( \ conj, \ schema, \ k, \ n, \ k_max, \ n_max, \ kappa, \ d, incd, \ b, rs_b, cs_b, \ *p, *rs_p, *cs_p, \ pd_p, *ps_p, \ cntx, \ thread \ ); \ \ /* Barrier so that packing is done before computation. */ \ bli_thrinfo_barrier( thread ); \ } //INSERT_GENTFUNC_BASIC0( packm_b ) GENTFUNC( float, s, packm_b ) GENTFUNC( double, d, packm_b ) GENTFUNC( scomplex, c, packm_b ) GENTFUNC( dcomplex, z, packm_b ) blis-1.1/addon/old/gemmd/bao_l3_packm_b.h000066400000000000000000000102211474157777200202450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ dim_t k, \ dim_t n, \ dim_t nr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ //INSERT_GENTPROT_BASIC0( packm_init_mem_b ) GENTPROT( float, s, packm_init_mem_b ) GENTPROT( double, d, packm_init_mem_b ) GENTPROT( scomplex, c, packm_init_mem_b ) GENTPROT( dcomplex, z, packm_init_mem_b ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ //INSERT_GENTPROT_BASIC0( packm_finalize_mem_b ) GENTPROT( float, s, packm_finalize_mem_b ) GENTPROT( double, d, packm_finalize_mem_b ) GENTPROT( scomplex, c, packm_finalize_mem_b ) GENTPROT( dcomplex, z, packm_finalize_mem_b ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ pack_t* restrict schema, \ dim_t k, \ dim_t n, \ dim_t nr, \ dim_t* restrict k_max, \ dim_t* restrict n_max, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ mem_t* restrict mem \ ); \ //INSERT_GENTPROT_BASIC0( packm_init_b ) GENTPROT( float, s, packm_init_b ) GENTPROT( double, d, packm_init_b ) GENTPROT( scomplex, c, packm_init_b ) GENTPROT( dcomplex, z, packm_init_b ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ conj_t conj, \ dim_t k_alloc, \ dim_t n_alloc, \ dim_t k, \ dim_t n, \ dim_t nr, \ ctype* restrict kappa, \ ctype* restrict d, inc_t incd, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ //INSERT_GENTPROT_BASIC0( packm_b ) GENTPROT( float, s, packm_b ) GENTPROT( double, d, packm_b ) GENTPROT( scomplex, c, packm_b ) GENTPROT( dcomplex, z, packm_b ) blis-1.1/addon/old/gemmd/bao_l3_packm_var.h000066400000000000000000000052171474157777200206250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces to the variants. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ trans_t transc, \ pack_t schema, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict d, inc_t incd, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p, \ cntx_t* restrict cntx, \ thrinfo_t* restrict thread \ ); //INSERT_GENTPROT_BASIC0( packm_var1 ) GENTPROT( float, s, packm_var1 ) GENTPROT( double, d, packm_var1 ) GENTPROT( scomplex, c, packm_var1 ) GENTPROT( dcomplex, z, packm_var1 ) //INSERT_GENTPROT_BASIC0( packm_var2 ) GENTPROT( float, s, packm_var2 ) GENTPROT( double, d, packm_var2 ) GENTPROT( scomplex, c, packm_var2 ) GENTPROT( dcomplex, z, packm_var2 ) blis-1.1/addon/old/gemmd/bao_l3_packm_var1.c000066400000000000000000000145701474157777200207030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Variant 1 provides basic support for packing by calling packm_cxk(). // #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ trans_t transc, \ pack_t schema, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict d, inc_t incd, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p, \ cntx_t* restrict cntx, \ thrinfo_t* restrict thread \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict c_cast = c; \ ctype* restrict p_cast = p; \ \ dim_t iter_dim; \ dim_t n_iter; \ dim_t it, ic; \ dim_t ic0; \ doff_t ic_inc; \ dim_t panel_len; \ dim_t panel_len_max; \ dim_t panel_dim; \ dim_t panel_dim_max; \ inc_t incc; \ inc_t ldc; \ inc_t ldp; \ conj_t conjc; \ \ \ /* Extract the conjugation bit from the transposition argument. */ \ conjc = bli_extract_conj( transc ); \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ bool row_stored = bli_is_col_packed( schema ); \ /*bool col_stored = bli_is_row_packed( schema );*/ \ \ /* If the row storage flag indicates row storage, then we are packing to column panels; otherwise, if the strides indicate column storage, we are packing to row panels. */ \ if ( row_stored ) \ { \ /* Prepare to pack to row-stored column panels. */ \ iter_dim = n; \ panel_len = m; \ panel_len_max = m_max; \ panel_dim_max = pd_p; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( col_stored ) */ \ { \ /* Prepare to pack to column-stored row panels. */ \ iter_dim = m; \ panel_len = n; \ panel_len_max = n_max; \ panel_dim_max = pd_p; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ /* Compute the total number of iterations we'll need. */ \ n_iter = iter_dim / panel_dim_max + ( iter_dim % panel_dim_max ? 1 : 0 ); \ \ /* Set the initial values and increments for indices related to C and P based on whether reverse iteration was requested. */ \ { \ ic0 = 0; \ ic_inc = panel_dim_max; \ } \ \ ctype* restrict p_begin = p_cast; \ \ /* Query the number of threads and thread ids from the current thread's packm thrinfo_t node. */ \ const dim_t nt = bli_thrinfo_n_way( thread ); \ const dim_t tid = bli_thrinfo_work_id( thread ); \ \ /* Suppress warnings in case tid isn't used (ie: as in slab partitioning). */ \ ( void )nt; \ ( void )tid; \ \ dim_t it_start, it_end, it_inc; \ \ /* Determine the thread range and increment using the current thread's packm thrinfo_t node. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &it_start, &it_end, &it_inc ); \ \ /* Iterate over every logical micropanel in the source matrix. */ \ for ( ic = ic0, it = 0; it < n_iter; \ ic += ic_inc, it += 1 ) \ { \ panel_dim = bli_min( panel_dim_max, iter_dim - ic ); \ \ ctype* restrict c_begin = c_cast + (ic )*incc; \ \ ctype* restrict c_use = c_begin; \ ctype* restrict p_use = p_begin; \ \ /* The definition of bli_packm_my_iter() will depend on whether slab or round-robin partitioning was requested at configure-time. (The default is slab.) */ \ if ( bli_packm_my_iter( it, it_start, it_end, tid, nt ) ) \ { \ PASTECH2(bao_,ch,packm_cxk) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa_cast, \ d, incd, \ c_use, incc, ldc, \ p_use, ldp, \ cntx \ ); \ } \ \ /* if ( !row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var1: a packed", panel_dim_max, panel_len_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ else \ PASTEMAC(ch,fprintm)( stdout, "packm_var1: b packed", panel_len_max, panel_dim_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ */ \ \ p_begin += ps_p; \ } \ } //INSERT_GENTFUNC_BASIC0( packm_var1 ) GENTFUNC( float, s, packm_var1 ) GENTFUNC( double, d, packm_var1 ) GENTFUNC( scomplex, c, packm_var1 ) GENTFUNC( dcomplex, z, packm_var1 ) blis-1.1/addon/old/gemmd/bao_l3_packm_var2.c000066400000000000000000000174751474157777200207130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Variant 2 is similar to variant 1, but inlines the contents of packm_cxk(). // #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ trans_t transc, \ pack_t schema, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict d, inc_t incd, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p, \ cntx_t* restrict cntx, \ thrinfo_t* restrict thread \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict c_cast = c; \ ctype* restrict p_cast = p; \ \ dim_t iter_dim; \ dim_t n_iter; \ dim_t it, ic; \ dim_t ic0; \ doff_t ic_inc; \ dim_t panel_len; \ dim_t panel_len_max; \ dim_t panel_dim; \ dim_t panel_dim_max; \ inc_t incc; \ inc_t ldc; \ inc_t ldp; \ conj_t conjc; \ \ \ /* Extract the conjugation bit from the transposition argument. */ \ conjc = bli_extract_conj( transc ); \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ bool row_stored = bli_is_col_packed( schema ); \ /*bool col_stored = bli_is_row_packed( schema );*/ \ \ /* If the row storage flag indicates row storage, then we are packing to column panels; otherwise, if the strides indicate column storage, we are packing to row panels. */ \ if ( row_stored ) \ { \ /* Prepare to pack to row-stored column panels. */ \ iter_dim = n; \ panel_len = m; \ panel_len_max = m_max; \ panel_dim_max = pd_p; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( col_stored ) */ \ { \ /* Prepare to pack to column-stored row panels. */ \ iter_dim = m; \ panel_len = n; \ panel_len_max = n_max; \ panel_dim_max = pd_p; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ /* Compute the total number of iterations we'll need. */ \ n_iter = iter_dim / panel_dim_max + ( iter_dim % panel_dim_max ? 1 : 0 ); \ \ /* Set the initial values and increments for indices related to C and P based on whether reverse iteration was requested. */ \ { \ ic0 = 0; \ ic_inc = panel_dim_max; \ } \ \ ctype* restrict p_begin = p_cast; \ \ /* Query the number of threads and thread ids from the current thread's packm thrinfo_t node. */ \ const dim_t nt = bli_thrinfo_n_way( thread ); \ const dim_t tid = bli_thrinfo_work_id( thread ); \ \ /* Suppress warnings in case tid isn't used (ie: as in slab partitioning). */ \ ( void )nt; \ ( void )tid; \ \ dim_t it_start, it_end, it_inc; \ \ /* Determine the thread range and increment using the current thread's packm thrinfo_t node. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &it_start, &it_end, &it_inc ); \ \ /* Iterate over every logical micropanel in the source matrix. */ \ for ( ic = ic0, it = 0; it < n_iter; \ ic += ic_inc, it += 1 ) \ { \ panel_dim = bli_min( panel_dim_max, iter_dim - ic ); \ \ ctype* restrict c_begin = c_cast + (ic )*incc; \ \ ctype* restrict c_use = c_begin; \ ctype* restrict p_use = p_begin; \ \ /* The definition of bli_packm_my_iter() will depend on whether slab or round-robin partitioning was requested at configure-time. (The default is slab.) */ \ if ( bli_packm_my_iter( it, it_start, it_end, tid, nt ) ) \ { \ /* NOTE: We assume here that kappa = 1 and therefore ignore it. If we're wrong, this will get someone's attention. */ \ if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) \ bli_abort(); \ \ /* Perform the packing, taking conjc into account. */ \ if ( bli_is_conj( conjc ) ) \ { \ for ( dim_t l = 0; l < panel_len; ++l ) \ { \ for ( dim_t d = 0; d < panel_dim; ++d ) \ { \ ctype* cld = c_use + (l )*ldc + (d )*incc; \ ctype* pld = p_use + (l )*ldp + (d )*1; \ \ PASTEMAC(ch,copyjs)( *cld, *pld ); \ } \ } \ } \ else \ { \ for ( dim_t l = 0; l < panel_len; ++l ) \ { \ for ( dim_t d = 0; d < panel_dim; ++d ) \ { \ ctype* cld = c_use + (l )*ldc + (d )*incc; \ ctype* pld = p_use + (l )*ldp + (d )*1; \ \ PASTEMAC(ch,copys)( *cld, *pld ); \ } \ } \ } \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ const dim_t i = panel_dim; \ const dim_t m_edge = panel_dim_max - panel_dim; \ const dim_t n_edge = panel_len_max; \ ctype* restrict p_edge = p_use + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ const dim_t j = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - panel_len; \ ctype* restrict p_edge = p_use + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ /* if ( !row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var1: a packed", panel_dim_max, panel_len_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ else \ PASTEMAC(ch,fprintm)( stdout, "packm_var1: b packed", panel_len_max, panel_dim_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ */ \ \ p_begin += ps_p; \ } \ } //INSERT_GENTFUNC_BASIC0( packm_var1 ) GENTFUNC( float, s, packm_var2 ) GENTFUNC( double, d, packm_var2 ) GENTFUNC( scomplex, c, packm_var2 ) GENTFUNC( dcomplex, z, packm_var2 ) blis-1.1/addon/old/gemmd/bao_packm_cxk.c000066400000000000000000000136171474157777200202220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTECH2(bao_,ch,opname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* d, inc_t incd, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Note that we use panel_dim_max, not panel_dim, to query the packm kernel function pointer. This means that we always use the same kernel, even for edge cases. */ \ num_t dt = PASTEMAC(ch,type); \ ukr_t ker_id = bli_is_col_packed( schema ) ? BLIS_PACKM_NRXK_KER : BLIS_PACKM_MRXK_KER; \ \ PASTECH2(ch,opname,_ker_ft) f; \ \ /* Query the context for the packm kernel corresponding to the current panel dimension, or kernel id. If the id is invalid, the function will return NULL. */ \ f = bli_cntx_get_ukr_dt( dt, ker_id, cntx ); \ \ /* If there exists a kernel implementation for the micro-panel dimension provided, we invoke the implementation. Otherwise, we use scal2m. */ \ /* NOTE: We've disabled calling packm micro-kernels from the context for this implementation. To re-enable, change FALSE to TRUE in the conditional below. */ \ if ( f != NULL && FALSE ) \ { \ f \ ( \ conja, \ schema, \ panel_dim, \ panel_len, \ panel_len_max, \ kappa, \ a, inca, lda, \ p, ldp, \ cntx \ ); \ } \ else \ { \ /* NOTE: We assume here that kappa = 1 and therefore ignore it. If we're wrong, this will get someone's attention. */ \ if ( !PASTEMAC(ch,eq1)( *kappa ) ) \ bli_abort(); \ \ if ( d == NULL ) \ { \ /* Perform the packing, taking conja into account. */ \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t l = 0; l < panel_len; ++l ) \ { \ for ( dim_t i = 0; i < panel_dim; ++i ) \ { \ ctype* ali = a + (l )*lda + (i )*inca; \ ctype* pli = p + (l )*ldp + (i )*1; \ \ PASTEMAC(ch,copyjs)( *ali, *pli ); \ } \ } \ } \ else \ { \ for ( dim_t l = 0; l < panel_len; ++l ) \ { \ for ( dim_t i = 0; i < panel_dim; ++i ) \ { \ ctype* ali = a + (l )*lda + (i )*inca; \ ctype* pli = p + (l )*ldp + (i )*1; \ \ PASTEMAC(ch,copys)( *ali, *pli ); \ } \ } \ } \ } \ else /* if ( d != NULL ) */ \ { \ /* Perform the packing, taking conja into account. */ \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t l = 0; l < panel_len; ++l ) \ { \ for ( dim_t i = 0; i < panel_dim; ++i ) \ { \ ctype* ali = a + (l )*lda + (i )*inca; \ ctype* dl = d + (l )*incd; \ ctype* pli = p + (l )*ldp + (i )*1; \ \ /* Note that ali must be the second operand here since that is what is conjugated by scal2js. */ \ PASTEMAC(ch,scal2js)( *dl, *ali, *pli ); \ } \ } \ } \ else \ { \ for ( dim_t l = 0; l < panel_len; ++l ) \ { \ for ( dim_t i = 0; i < panel_dim; ++i ) \ { \ ctype* ali = a + (l )*lda + (i )*inca; \ ctype* dl = d + (l )*incd; \ ctype* pli = p + (l )*ldp + (i )*1; \ \ PASTEMAC(ch,scal2s)( *ali, *dl, *pli ); \ } \ } \ } \ } \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ const dim_t i = panel_dim; \ const dim_t m_edge = panel_dim_max - panel_dim; \ const dim_t n_edge = panel_len_max; \ ctype* restrict p_edge = p + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ const dim_t j = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - panel_len; \ ctype* restrict p_edge = p + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ } //INSERT_GENTFUNC_BASIC0( packm_cxk ) GENTFUNC( float, s, packm_cxk ) GENTFUNC( double, d, packm_cxk ) GENTFUNC( scomplex, c, packm_cxk ) GENTFUNC( dcomplex, z, packm_cxk ) blis-1.1/addon/old/gemmd/bao_packm_cxk.h000066400000000000000000000043721474157777200202250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTECH2(bao_,ch,varname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* d, inc_t incd, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ); //INSERT_GENTPROT_BASIC0( packm_cxk ) GENTPROT( float, s, packm_cxk ) GENTPROT( double, d, packm_cxk ) GENTPROT( scomplex, c, packm_cxk ) GENTPROT( dcomplex, z, packm_cxk ) blis-1.1/addon/old/gemmd/gemmd.h000066400000000000000000000037431474157777200165360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of copyright holder(s) nor the names contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef GEMMD_H #define GEMMD_H // This header should contain (or #include) any definitions that must be // folded into blis.h. #include "bao_gemmd.h" #include "bao_gemmd_check.h" #include "bao_gemmd_var.h" #include "bao_l3_packm_a.h" #include "bao_l3_packm_b.h" #include "bao_l3_packm_var.h" #include "bao_packm_cxk.h" #include "bao_l3_decor.h" #endif blis-1.1/addon/old/gemmd/thread/000077500000000000000000000000001474157777200165345ustar00rootroot00000000000000blis-1.1/addon/old/gemmd/thread/bao_l3_decor.c000066400000000000000000000107461474157777200212230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Initialize a function pointer array containing function addresses for // each of the threading-specific level-3 thread decorators. static l3ao_decor_ft l3ao_decor_fpa[ BLIS_NUM_THREAD_IMPLS ] = { [BLIS_SINGLE] = bao_l3_thread_decorator_single, [BLIS_OPENMP] = #if defined(BLIS_ENABLE_OPENMP) bao_l3_thread_decorator_openmp, #elif defined(BLIS_ENABLE_PTHREADS) NULL, #else NULL, #endif [BLIS_POSIX] = #if defined(BLIS_ENABLE_PTHREADS) bao_l3_thread_decorator_pthreads, #elif defined(BLIS_ENABLE_OPENMP) NULL, #else NULL, #endif }; // Define a dispatcher that chooses a threading-specific function from the // above function pointer array. void bao_l3_thread_decorator ( l3aoint_ft func, opid_t family, obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { rntm_t rntm_l; // Query the threading implementation and the number of threads requested. timpl_t ti = bli_rntm_thread_impl( rntm ); dim_t nt = bli_rntm_num_threads( rntm ); if ( bli_error_checking_is_enabled() ) bao_l3_thread_decorator_check( rntm ); if ( 1 < nt && ti == BLIS_SINGLE ) { // Here, we resolve conflicting information. The caller requested // a sequential threading implementation, but also requested more // than one thread. Here, we choose to favor the requested threading // implementation over the number of threads, and so reset all // parallelism parameters to 1. rntm_l = *rntm; nt = 1; bli_rntm_set_ways_only( 1, 1, 1, 1, 1, &rntm_l ); bli_rntm_set_num_threads_only( 1, &rntm_l ); rntm = &rntm_l; } // Use the timpl_t value to index into the corresponding function address // from the function pointer array. const l3ao_decor_ft fp = l3ao_decor_fpa[ ti ]; // Call the threading-specific decorator function. fp ( func, family, alpha, a, d, b, beta, c, cntx, rntm ); } void bao_l3_thread_decorator_check ( rntm_t* rntm ) { //err_t e_val; //e_val = bli_check_valid_thread_impl( bli_rntm_thread_impl( rntm ) ); //bli_check_error_code( e_val ); const timpl_t ti = bli_rntm_thread_impl( rntm ); if ( #ifndef BLIS_ENABLE_OPENMP ti == BLIS_OPENMP || #endif #ifndef BLIS_ENABLE_PTHREADS ti == BLIS_POSIX || #endif FALSE ) { fprintf( stderr, "\n" ); fprintf( stderr, "libblis: User requested threading implementation \"%s\", but that method is\n", ( ti == BLIS_OPENMP ? "openmp" : "pthreads" ) ); fprintf( stderr, "libblis: unavailable. Try reconfiguring BLIS with \"-t %s\" and recompiling.\n", ( ti == BLIS_OPENMP ? "openmp" : "pthreads" ) ); fprintf( stderr, "libblis: %s: line %d\n", __FILE__, ( int )__LINE__ ); bli_abort(); } } blis-1.1/addon/old/gemmd/thread/bao_l3_decor.h000066400000000000000000000054341474157777200212260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Level-3 internal function type. typedef void (*l3aoint_ft) ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* d, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); // Level-3 thread decorator function type. typedef void (*l3ao_decor_ft) ( l3aoint_ft func, opid_t family, obj_t* alpha, obj_t* a, obj_t* b, obj_t* d, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); // Level-3 thread decorator prototype. void bao_l3_thread_decorator ( l3aoint_ft func, opid_t family, obj_t* alpha, obj_t* a, obj_t* b, obj_t* d, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); void bao_l3_thread_decorator_check ( rntm_t* rntm ); // Include definitions specific to the method of multithreading. #include "bao_l3_decor_single.h" #include "bao_l3_decor_openmp.h" #include "bao_l3_decor_pthreads.h" blis-1.1/addon/old/gemmd/thread/bao_l3_decor_openmp.c000066400000000000000000000111131474157777200225660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_OPENMP //#define PRINT_THRINFO void bao_l3_thread_decorator_openmp ( l3aoint_ft func, opid_t family, obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { // Query the total number of threads from the rntm_t object. const dim_t n_threads = bli_rntm_num_threads( rntm ); // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field // initialized and ready for the global communicator creation below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. This will be // inherited by all of the child threads when they make local copies of // the rntm below. bli_pba_rntm_set_pba( rntm ); // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* gl_comm = bli_thrcomm_create( rntm, n_threads ); _Pragma( "omp parallel num_threads(n_threads)" ) { // Create a thread-local copy of the master thread's rntm_t. This is // necessary since we want each thread to be able to track its own // small block pool_t as it executes down the function stack. rntm_t rntm_l = *rntm; rntm_t* restrict rntm_p = &rntm_l; // Query the thread's id from OpenMP. const dim_t tid = omp_get_thread_num(); // Check for a somewhat obscure OpenMP thread-mistmatch issue. bli_l3_thread_decorator_thread_check( n_threads, tid, gl_comm, rntm_p ); // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. bli_sba_rntm_set_pool( tid, array, rntm_p ); thrinfo_t* thread = NULL; // Create the root node of the thread's thrinfo_t structure. bli_l3_sup_thrinfo_create_root( tid, gl_comm, rntm_p, &thread ); func ( alpha, a, d, b, beta, c, cntx, rntm_p, thread ); // Free the current thread's thrinfo_t structure. bli_l3_sup_thrinfo_free( rntm_p, thread ); } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called from the thread entry function). // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); } #endif blis-1.1/addon/old/gemmd/thread/bao_l3_decor_openmp.h000066400000000000000000000040231474157777200225750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Definitions specific to situations when OpenMP multithreading is enabled. #ifdef BLIS_ENABLE_OPENMP void bao_l3_thread_decorator_openmp ( l3aoint_ft func, opid_t family, obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); #endif blis-1.1/addon/old/gemmd/thread/bao_l3_decor_pthreads.c000066400000000000000000000163701474157777200231140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_PTHREADS // A data structure to assist in passing operands to additional threads. typedef struct thread_data { l3aoint_ft func; opid_t family; obj_t* alpha; obj_t* a; obj_t* d; obj_t* b; obj_t* beta; obj_t* c; cntx_t* cntx; rntm_t* rntm; dim_t tid; thrcomm_t* gl_comm; array_t* array; } thread_data_t; // Entry point function for additional threads. void* bao_l3_thread_entry( void* data_void ) { thread_data_t* data = data_void; l3aoint_ft func = data->func; opid_t family = data->family; obj_t* alpha = data->alpha; obj_t* a = data->a; obj_t* d = data->d; obj_t* b = data->b; obj_t* beta = data->beta; obj_t* c = data->c; cntx_t* cntx = data->cntx; rntm_t* rntm = data->rntm; dim_t tid = data->tid; array_t* array = data->array; thrcomm_t* gl_comm = data->gl_comm; ( void )family; // Create a thread-local copy of the master thread's rntm_t. This is // necessary since we want each thread to be able to track its own // small block pool_t as it executes down the function stack. rntm_t rntm_l = *rntm; rntm_t* restrict rntm_p = &rntm_l; // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. bli_sba_rntm_set_pool( tid, array, rntm_p ); thrinfo_t* thread = NULL; // Create the root node of the current thread's thrinfo_t structure. bli_l3_sup_thrinfo_create_root( tid, gl_comm, rntm_p, &thread ); func ( alpha, a, d, b, beta, c, cntx, rntm_p, thread ); // Free the current thread's thrinfo_t structure. bli_l3_sup_thrinfo_free( rntm_p, thread ); return NULL; } void bao_l3_thread_decorator_pthreads ( l3aoint_ft func, opid_t family, obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { err_t r_val; // Query the total number of threads from the context. const dim_t n_threads = bli_rntm_num_threads( rntm ); // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field // initialized and ready for the global communicator creation below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. This will be // inherited by all of the child threads when they make local copies of // the rntm below. bli_pba_rntm_set_pba( rntm ); // Allocate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); // Allocate an array of pthread objects and auxiliary data structs to pass // to the thread entry functions. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads, &r_val ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads, &r_val ); // NOTE: We must iterate backwards so that the chief thread (thread id 0) // can spawn all other threads before proceeding with its own computation. for ( dim_t tid = n_threads - 1; 0 <= tid; tid-- ) { // Set up thread data for additional threads (beyond thread 0). datas[tid].func = func; datas[tid].family = family; datas[tid].alpha = alpha; datas[tid].a = a; datas[tid].d = d; datas[tid].b = b; datas[tid].beta = beta; datas[tid].c = c; datas[tid].cntx = cntx; datas[tid].rntm = rntm; datas[tid].tid = tid; datas[tid].gl_comm = gl_comm; datas[tid].array = array; // Spawn additional threads for ids greater than 1. if ( tid != 0 ) bli_pthread_create( &pthreads[tid], NULL, &bao_l3_thread_entry, &datas[tid] ); else bao_l3_thread_entry( ( void* )(&datas[0]) ); } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called from the thread entry function). // Thread 0 waits for additional threads to finish. for ( dim_t tid = 1; tid < n_threads; tid++ ) { bli_pthread_join( pthreads[tid], NULL ); } // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif bli_free_intl( pthreads ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif bli_free_intl( datas ); } #else // Define a dummy function bli_l3_thread_entry(), which is needed for // consistent dynamic linking behavior when building shared objects in Linux // or OSX, or Windows DLLs; otherwise, we risk having an unresolved symbol. void* bao_l3_thread_entry( void* data_void ) { return NULL; } #endif blis-1.1/addon/old/gemmd/thread/bao_l3_decor_pthreads.h000066400000000000000000000041461474157777200231170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Definitions specific to situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_PTHREADS // Thread entry point prototype. void* bao_l3_thread_entry( void* data_void ); void bao_l3_thread_decorator_pthreads ( l3aoint_ft func, opid_t family, obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); #endif blis-1.1/addon/old/gemmd/thread/bao_l3_decor_single.c000066400000000000000000000113171474157777200225570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define SKIP_THRINFO_TREE void bao_l3_thread_decorator_single ( l3aoint_ft func, opid_t family, //pack_t schema_a, //pack_t schema_b, obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { // For sequential execution, we use only one thread. const dim_t n_threads = 1; // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. bli_pba_rntm_set_pba( rntm ); #ifndef SKIP_THRINFO_TREE // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); #endif { // NOTE: We don't need to create another copy of the rntm_t since // it was already copied in one of the high-level oapi functions. rntm_t* restrict rntm_p = rntm; // There is only one thread id (for the thief thread). const dim_t tid = 0; // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. // NOTE: This is commented out because, in the single-threaded case, // this is redundant since it's already been done above. //bli_sba_rntm_set_pool( tid, array, rntm_p ); #ifndef SKIP_THRINFO_TREE thrinfo_t* thread = NULL; // Create the root node of the thread's thrinfo_t structure. bli_l3_sup_thrinfo_create_root( tid, gl_comm, rntm_p, &thread ); #else // This optimization allows us to use one of the global thrinfo_t // objects for single-threaded execution rather than grow one from // scratch. The key is that bli_thrinfo_sup_grow(), which is called // from within the variants, will immediately return if it detects // that the thrinfo_t* passed into it is either // &BLIS_GEMM_SINGLE_THREADED or &BLIS_PACKM_SINGLE_THREADED. thrinfo_t* thread = &BLIS_GEMM_SINGLE_THREADED; ( void )tid; #endif func ( alpha, a, d, b, beta, c, cntx, rntm_p, thread ); #ifndef SKIP_THRINFO_TREE // Free the current thread's thrinfo_t structure. bli_l3_sup_thrinfo_free( rntm_p, thread ); #endif } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); } blis-1.1/addon/old/gemmd/thread/bao_l3_decor_single.h000066400000000000000000000037361474157777200225720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bao_l3_thread_decorator_single ( l3aoint_ft func, opid_t family, //pack_t schema_a, //pack_t schema_b, obj_t* alpha, obj_t* a, obj_t* d, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); blis-1.1/blastest/000077500000000000000000000000001474157777200141525ustar00rootroot00000000000000blis-1.1/blastest/Makefile000066400000000000000000000165471474157777200156270ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for BLAS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all f2c bin \ clean cleanf2c cleanobj cleanbin cleanout \ check-env check-env-mk check-env-fragments check-env-make-defs \ run check # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := .. LIB_PATH = ../lib/$(CONFIG_NAME) INC_PATH = ../include/$(CONFIG_NAME) SHARE_PATH := .. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- General build definitions ------------------------------------------------ # TEST_OBJ_PATH := obj F2C_LIB := libf2c.a F2C_PATH := f2c DRIVER_PATH := src BLIS_H_PATH := $(BUILD_PATH)/$(BASE_INC_PATH) INPUT_DIR := input # Gather all local object files. F2C_OBJS := $(sort $(patsubst $(F2C_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(F2C_PATH)/*.c))) DRIVER_OBJS := $(sort $(patsubst $(DRIVER_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(DRIVER_PATH)/*.c))) # Extract base names for each test driver file. DRIVER_BASES := $(basename $(notdir $(DRIVER_OBJS))) # Binary executable names. DRIVER_BINS := $(addsuffix .x,$(DRIVER_BASES)) # Binary run-rule names DRIVER_BINS_R := $(addprefix run-,$(DRIVER_BASES)) # Filter level-1, level-2, and level-3 names to different variables. DRIVER1_BASES := $(filter %1,$(DRIVER_BASES)) DRIVER2_BASES := $(filter %2,$(DRIVER_BASES)) DRIVER3_BASES := $(filter %3,$(DRIVER_BASES)) # The location of the script that checks the BLAS test output. #BLASTEST_CHECK := $(DIST_PATH)/$(BUILD_DIR)/check-blastest.sh # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Suppress warnings about uninitialized functions, add local header # paths and the path to blis.h to CFLAGS. CFLAGS += -Wno-maybe-uninitialized -Wno-parentheses -Wfatal-errors \ -I$(F2C_PATH) \ -I$(INC_PATH) -DHAVE_BLIS_H # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Override the location of the check-blastest.sh script. #BLASTEST_CHECK := ./check-blastest.sh TESTSUITE_WRAPPER ?= # # --- Targets/rules ------------------------------------------------------------ # # --- Primary targets --- all: check-env f2c bin f2c: check-env $(F2C_LIB) bin: check-env $(DRIVER_BINS) # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(F2C_PATH)/%.c ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(CFLAGS) -c $< -o $@ endif $(TEST_OBJ_PATH)/%.o: $(DRIVER_PATH)/%.c ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(CFLAGS) -c $< -o $@ endif # -- libf2c library archive rule -- $(F2C_LIB): $(F2C_OBJS) ifeq ($(ENABLE_VERBOSE),yes) $(AR) $(ARFLAGS) $@ $? $(RANLIB) $@ else @echo "Archiving $@" @$(AR) $(ARFLAGS) $@ $? @$(RANLIB) $@ endif # -- Executable file rules -- # first argument: the base name of the BLAS test driver. define make-blat-rule $(1).x: $(TEST_OBJ_PATH)/$(1).o $(F2C_LIB) $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $(TEST_OBJ_PATH)/$(1).o $(F2C_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@ else @echo "Linking $$@ against '$(F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS)'" @$(LINKER) $(TEST_OBJ_PATH)/$(1).o $(F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@ endif endef # Instantiate the rule above for each driver file. $(foreach name, $(DRIVER_BASES), $(eval $(call make-blat-rule,$(name)))) # -- Test run rules -- run: $(DRIVER_BINS_R) # A rule to run ?blat1.x driver files. define make-run-blat1-rule run-$(1): $(1).x ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(1).x > out.$(1) else @echo "Running $(1).x > 'out.$(1)'" @$(TESTSUITE_WRAPPER) ./$(1).x > out.$(1) endif endef # Instantiate the rule above for each level-1 driver file. $(foreach name, $(DRIVER1_BASES), $(eval $(call make-run-blat1-rule,$(name)))) # A rule to run ?blat2.x and ?blat3.x driver files. define make-run-blat23-rule run-$(1): $(1).x ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(1).x < $(INPUT_DIR)/$(1).in else @echo "Running $(1).x < '$(INPUT_DIR)/$(1).in' (output to 'out.$(1)')" @$(TESTSUITE_WRAPPER) ./$(1).x < $(INPUT_DIR)/$(1).in endif endef # Instantiate the rule above for each level-2 driver file. $(foreach name, $(DRIVER2_BASES), $(eval $(call make-run-blat23-rule,$(name)))) # Instantiate the rule above for each level-3 driver file. $(foreach name, $(DRIVER3_BASES), $(eval $(call make-run-blat23-rule,$(name)))) check: run ifeq ($(ENABLE_VERBOSE),yes) - $(BLASTEST_CHECK) else @- $(BLASTEST_CHECK) endif # -- Clean rules -- cleanf2c: - $(RM_F) $(F2C_OBJS) $(F2C_LIB) cleanobj: - $(RM_F) $(DRIVER_OBJS) cleanbin: - $(RM_F) $(DRIVER_BINS) cleanout: - $(RM_F) $(addprefix out.,$(DRIVER_BASES)) clean: cleanf2c cleanobj cleanbin cleanout blis-1.1/blastest/check-blastest.sh000077500000000000000000000042141474157777200174060ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # script_name=${0##*/} ansi_red="\033[0;31m" ansi_green="\033[0;32m" ansi_normal="\033[0m" passmsg="All BLAS tests passed!" failmsg0="At least one BLAS test failed. :(" failmsg1="Please see out.* files for details." grep -q '\*\*\*\*' ./out.* if [ $? -eq 0 ]; then printf "${ansi_red}""${script_name}: ${failmsg0}""${ansi_normal}\n" printf "${ansi_red}""${script_name}: ${failmsg1}""${ansi_normal}\n" exit 1 else printf "${ansi_green}""${script_name}: ${passmsg}""${ansi_normal}\n" exit 0 fi blis-1.1/blastest/f2c/000077500000000000000000000000001474157777200146245ustar00rootroot00000000000000blis-1.1/blastest/f2c/abs.c000066400000000000000000000041311474157777200155340ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* Integer */ shortint h_abs(const shortint *x) { return ( shortint )( *x >= 0 ? (*x) : (- *x) ); //return ( shortint )abs( ( int )*x ); } integer i_abs(const integer *x) { return ( integer )( *x >= 0 ? (*x) : (- *x) ); //return ( integer )abs( ( int )*x ); } /* Double */ double r_abs(real *x) { return ( double )( *x >= 0 ? (*x) : (- *x) ); //return ( double )fabsf( ( float )*x ); } double d_abs(const doublereal *x) { return ( double )( *x >= 0 ? (*x) : (- *x) ); //return ( double )fabs( ( double )*x ); } /* Complex */ double c_abs(const complex *z) { return ( double )hypot(z->r, z->i); } double z_abs(const doublecomplex *z) { return ( double )hypot(z->r, z->i); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/acos.c000066400000000000000000000026441474157777200157230ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_acos(real *x) { return( acos(*x) ); } double d_acos(const doublereal *x) { return( acos(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/arith.h000066400000000000000000000042761474157777200161150ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #ifndef F2C_ARITH_H #define F2C_ARITH_H #include #include #ifdef _MSC_VER #define isnan _isnan #define isinf(x) (!_finite(x)) #endif #ifndef isnan # define isnan(x) \ (sizeof (x) == sizeof (long double) ? isnan_ld (x) \ : sizeof (x) == sizeof (double) ? isnan_d (x) \ : isnan_f (x)) static inline int isnan_f (float x) { return x != x; } static inline int isnan_d (double x) { return x != x; } static inline int isnan_ld (long double x) { return x != x; } #endif #ifndef isinf # define isinf(x) \ (sizeof (x) == sizeof (long double) ? isinf_ld (x) \ : sizeof (x) == sizeof (double) ? isinf_d (x) \ : isinf_f (x)) static inline int isinf_f (float x) { return !isnan (x) && isnan (x - x); } static inline int isinf_d (double x) { return !isnan (x) && isnan (x - x); } static inline int isinf_ld (long double x) { return !isnan (x) && isnan (x - x); } #endif #ifndef signbit #define signbit(x) (((x) < 0)? 1 : 0) #endif #endif blis-1.1/blastest/f2c/asin.c000066400000000000000000000026441474157777200157300ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_asin(real *x) { return( asin(*x) ); } double d_asin(const doublereal *x) { return( asin(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/atan.c000066400000000000000000000026441474157777200157210ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_atan(real *x) { return( atan(*x) ); } double d_atan(const doublereal *x) { return( atan(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/atn2.c000066400000000000000000000027121474157777200156360ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_atn2(real *x, real *y) { return( atan2(*x,*y) ); } double d_atn2(const doublereal *x, const doublereal *y) { return( atan2(*x,*y) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/close.c000066400000000000000000000046101474157777200160760ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #undef abs #undef min #undef max #include #if defined(NON_UNIX_STDIO) || defined(_MSC_VER) || defined(__MINGW32__) # include # define unlink remove #else # include #endif integer f_clos(cllist *a) { unit *b; if(a->cunit >= MXUNIT) return(0); b= &f__units[a->cunit]; if(b->ufd==NULL) goto done; if (b->uscrtch == 1) goto Delete; if (!a->csta) goto Keep; switch(*a->csta) { default: Keep: case 'k': case 'K': if(b->uwrt == 1) t_runc((alist *)a); if(b->ufnm) { fclose(b->ufd); free(b->ufnm); } break; case 'd': case 'D': Delete: fclose(b->ufd); if(b->ufnm) { unlink(b->ufnm); /*SYSDEP*/ free(b->ufnm); } } b->ufd=NULL; done: b->uend=0; b->ufnm=NULL; return(0); } void f_exit(void) { static int run = 0; int i; static cllist xx; /* Do not execute f_exit() twice */ if (run) return; run = 1; if (!xx.cerr) { xx.cerr=1; xx.csta=NULL; for(i=0;ii; r->r = z->r; r->i = -zi; } void r_cnjg(complex *r, complex *z) { real zi = z->i; r->r = z->r; r->i = -zi; } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/cos.c000066400000000000000000000033611474157777200155570ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_cos(real *x) { return( cos(*x) ); } double d_cos(const doublereal *x) { return( cos(*x) ); } void c_cos(complex *r, complex *z) { double zi = z->i, zr = z->r; r->r = cos(zr) * cosh(zi); r->i = - sin(zr) * sinh(zi); } void z_cos(doublecomplex *r, doublecomplex *z) { double zi = z->i, zr = z->r; r->r = cos(zr) * cosh(zi); r->i = - sin(zr) * sinh(zi); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/cosh.c000066400000000000000000000026451474157777200157330ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_cosh(real *x) { return( cosh(*x) ); } double d_cosh(const doublereal *x) { return( cosh(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/dim.c000066400000000000000000000032611474157777200155430ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_dim(const shortint *a, const shortint *b) { return( *a > *b ? *a - *b : 0); } integer i_dim(const integer *a, const integer *b) { return( *a > *b ? *a - *b : 0); } double r_dim(real *a, real *b) { return( *a > *b ? *a - *b : 0); } double d_dim(const doublereal *a, const doublereal *b) { return( *a > *b ? *a - *b : 0); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/div.c000066400000000000000000000063631474157777200155620ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif void c_div(complex *c, complex *a, complex *b) { double ratio, den; double abr, abi, cr; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if(abi == 0) { #ifdef IEEE_COMPLEX_DIVIDE float af, bf; af = bf = abr; if (a->i != 0 || a->r != 0) af = 1.; c->i = c->r = af / bf; return; #else sig_die("complex division by zero", 1); #endif } ratio = (double)b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; c->i = (a->i*ratio - a->r) / den; } else { ratio = (double)b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; c->i = (a->i - a->r*ratio) / den; } c->r = cr; } void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) { double ratio, den; double abr, abi, cr; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if(abi == 0) { #ifdef IEEE_COMPLEX_DIVIDE if (a->i != 0 || a->r != 0) abi = 1.; c->i = c->r = abi / abr; return; #else sig_die("complex division by zero", 1); #endif } ratio = b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; c->i = (a->i*ratio - a->r) / den; } else { ratio = b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; c->i = (a->i - a->r*ratio) / den; } c->r = cr; } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/dolio.c000066400000000000000000000025551474157777200161050ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" integer do_lio(ftnint *type, ftnint *number, char *ptr, ftnlen len) { return((*f__lioproc)(number,ptr,len,*type)); } blis-1.1/blastest/f2c/endfile.c000066400000000000000000000066221474157777200164040ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #ifdef HAVE_FTRUNCATE #include #endif #undef abs #undef min #undef max #include "stdlib.h" #include "string.h" integer f_end(alist *a) { unit *b; FILE *tf; if(a->aunit>=MXUNIT || a->aunit<0) err(a->aerr,101,"endfile"); b = &f__units[a->aunit]; if(b->ufd==NULL) { char nbuf[10]; sprintf(nbuf,"fort.%ld",(long)a->aunit); if (tf = fopen(nbuf, f__w_mode[0])) fclose(tf); return(0); } b->uend=1; return(b->useek ? t_runc(a) : 0); } #if !defined(HAVE_FTRUNCATE) static int copy(FILE *from, register long len, FILE *to) { int len1; char buf[BUFSIZ]; while(fread(buf, len1 = len > BUFSIZ ? BUFSIZ : (int)len, 1, from)) { if (!fwrite(buf, len1, 1, to)) return 1; if ((len -= len1) <= 0) break; } return 0; } #endif /* !HAVE_FTRUNCATE */ int t_runc(alist *a) { OFF_T loc, len; unit *b; int rc; FILE *bf; #if !defined(HAVE_FTRUNCATE) FILE *tf; #endif b = &f__units[a->aunit]; if(b->url) return(0); /*don't truncate direct files*/ loc=FTELL(bf = b->ufd); FSEEK(bf,(OFF_T)0,SEEK_END); len=FTELL(bf); if (loc >= len || b->useek == 0) return(0); #ifndef HAVE_FTRUNCATE if (b->ufnm == NULL) return 0; rc = 0; fclose(b->ufd); if (!loc) { if (!(bf = fopen(b->ufnm, f__w_mode[b->ufmt]))) rc = 1; if (b->uwrt) b->uwrt = 1; goto done; } if (!(bf = fopen(b->ufnm, f__r_mode[0])) || !(tf = tmpfile())) { #ifdef NON_UNIX_STDIO bad: #endif rc = 1; goto done; } if (copy(bf, (long)loc, tf)) { bad1: rc = 1; goto done1; } if (!(bf = freopen(b->ufnm, f__w_mode[0], bf))) goto bad1; rewind(tf); if (copy(tf, (long)loc, bf)) goto bad1; b->uwrt = 1; b->urw = 2; #ifdef NON_UNIX_STDIO if (b->ufmt) { fclose(bf); if (!(bf = fopen(b->ufnm, f__w_mode[3]))) goto bad; FSEEK(bf,(OFF_T)0,SEEK_END); b->urw = 3; } #endif done1: fclose(tf); done: f__cf = b->ufd = bf; #else /* !HAVE_TRUNCATE */ if (b->urw & 2) fflush(b->ufd); /* necessary on some Linux systems */ rc = ftruncate(fileno(b->ufd), loc); /* The following FSEEK is unnecessary on some systems, */ /* but should be harmless. */ FSEEK(b->ufd, (OFF_T)0, SEEK_END); #endif /* HAVE_TRUNCATE */ if (rc) err(a->aerr,111,"endfile"); return 0; } blis-1.1/blastest/f2c/epsilon.c000066400000000000000000000026421474157777200164450ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #include "float.h" #ifdef __cplusplus extern "C" { #endif real s_epsilon_( real* x ) { return FLT_EPSILON; } doublereal d_epsilon_( doublereal* x ) { return DBL_EPSILON; } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/err.c000066400000000000000000000153711474157777200155670ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #if defined(_MSC_VER) || defined(__MINGW32__) # include # include #else # ifdef HAVE_ISATTY # include # else # define isatty(x) 0 # endif #endif #include "f2c.h" #include "fio.h" #include "fmt.h" /* for struct syl */ /*global definitions*/ unit f__units[MXUNIT]; /*unit table*/ flag f__init; /*0 on entry, 1 after initializations*/ cilist *f__elist; /*active external io list*/ icilist *f__svic; /*active internal io list*/ flag f__reading; /*1 if reading, 0 if writing*/ flag f__cplus,f__cblank; const char *f__fmtbuf; flag f__external; /*1 if external io, 0 if internal */ int (*f__getn)(void); /* for formatted input */ void (*f__putn)(int); /* for formatted output */ int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); int (*f__dorevert)(void),(*f__donewrec)(void),(*f__doend)(void); flag f__sequential; /*1 if sequential io, 0 if direct*/ flag f__formatted; /*1 if formatted io, 0 if unformatted*/ FILE *f__cf; /*current file*/ unit *f__curunit; /*current unit*/ int f__recpos; /*place in current record*/ OFF_T f__cursor, f__hiwater; int f__scale; char *f__icptr; /*error messages*/ const char *F_err[] = { "error in format", /* 100 */ "illegal unit number", /* 101 */ "formatted io not allowed", /* 102 */ "unformatted io not allowed", /* 103 */ "direct io not allowed", /* 104 */ "sequential io not allowed", /* 105 */ "can't backspace file", /* 106 */ "null file name", /* 107 */ "can't stat file", /* 108 */ "unit not connected", /* 109 */ "off end of record", /* 110 */ "truncation failed in endfile", /* 111 */ "incomprehensible list input", /* 112 */ "out of free space", /* 113 */ "unit not connected", /* 114 */ "read unexpected character", /* 115 */ "bad logical input field", /* 116 */ "bad variable type", /* 117 */ "bad namelist name", /* 118 */ "variable not in namelist", /* 119 */ "no end record", /* 120 */ "variable count incorrect", /* 121 */ "subscript for scalar variable", /* 122 */ "invalid array section", /* 123 */ "substring out of bounds", /* 124 */ "subscript out of bounds", /* 125 */ "can't read file", /* 126 */ "can't write file", /* 127 */ "'new' file exists", /* 128 */ "can't append to file", /* 129 */ "non-positive record number", /* 130 */ "nmLbuf overflow" /* 131 */ }; #define MAXERR (sizeof(F_err)/sizeof(char *)+100) #if defined(_MSC_VER) || defined(__MINGW32__) #undef isatty #define isatty _isatty #undef fileno #define fileno _fileno #endif int f__canseek(FILE *f) /*SYSDEP*/ { #ifdef NON_UNIX_STDIO return !isatty(fileno(f)); #else struct stat x; if (fstat(fileno(f),&x) < 0) return(0); #ifdef S_IFMT switch(x.st_mode & S_IFMT) { case S_IFDIR: case S_IFREG: if(x.st_nlink > 0) /* !pipe */ return(1); else return(0); case S_IFCHR: if(isatty(fileno(f))) return(0); return(1); #ifdef S_IFBLK case S_IFBLK: return(1); #endif } #else #ifdef S_ISDIR /* POSIX version */ if (S_ISREG(x.st_mode) || S_ISDIR(x.st_mode)) { if(x.st_nlink > 0) /* !pipe */ return(1); else return(0); } if (S_ISCHR(x.st_mode)) { if(isatty(fileno(f))) return(0); return(1); } if (S_ISBLK(x.st_mode)) return(1); #else Help! How does fstat work on this system? #endif #endif return(0); /* who knows what it is? */ #endif } void f__fatal(int n, const char *s) { if(n<100 && n>=0) perror(s); /*SYSDEP*/ else if(n >= (int)MAXERR || n < -1) { fprintf(stderr,"%s: illegal error number %d\n",s,n); } else if(n == -1) fprintf(stderr,"%s: end of file\n",s); else fprintf(stderr,"%s: %s\n",s,F_err[n-100]); if (f__curunit) { fprintf(stderr,"apparent state: unit %d ", (int)(f__curunit-f__units)); fprintf(stderr, f__curunit->ufnm ? "named %s\n" : "(unnamed)\n", f__curunit->ufnm); } else fprintf(stderr,"apparent state: internal I/O\n"); if (f__fmtbuf) fprintf(stderr,"last format: %s\n",f__fmtbuf); fprintf(stderr,"lately %s %s %s %s",f__reading?"reading":"writing", f__sequential?"sequential":"direct",f__formatted?"formatted":"unformatted", f__external?"external":"internal"); sig_die(" IO", 1); } void f_init(void) { unit *p; f__init=1; p= &f__units[0]; p->ufd=stderr; p->useek=f__canseek(stderr); p->ufmt=1; p->uwrt=1; p = &f__units[5]; p->ufd=stdin; p->useek=f__canseek(stdin); p->ufmt=1; p->uwrt=0; p= &f__units[6]; p->ufd=stdout; p->useek=f__canseek(stdout); p->ufmt=1; p->uwrt=1; } int f__nowreading(unit *x) { OFF_T loc; int ufmt, urw; if (x->urw & 1) goto done; if (!x->ufnm) goto cantread; ufmt = x->url ? 0 : x->ufmt; loc = FTELL(x->ufd); urw = 3; if (!freopen(x->ufnm, f__w_mode[ufmt|2], x->ufd)) { urw = 1; if(!freopen(x->ufnm, f__r_mode[ufmt], x->ufd)) { cantread: errno = 126; return 1; } } FSEEK(x->ufd,loc,SEEK_SET); x->urw = urw; done: x->uwrt = 0; return 0; } int f__nowwriting(unit *x) { OFF_T loc; int ufmt; if (x->urw & 2) { if (x->urw & 1) FSEEK(x->ufd, (OFF_T)0, SEEK_CUR); goto done; } if (!x->ufnm) goto cantwrite; ufmt = x->url ? 0 : x->ufmt; if (x->uwrt == 3) { /* just did write, rewind */ if (!(f__cf = x->ufd = freopen(x->ufnm,f__w_mode[ufmt],x->ufd))) goto cantwrite; x->urw = 2; } else { loc=FTELL(x->ufd); if (!(f__cf = x->ufd = freopen(x->ufnm, f__w_mode[ufmt | 2], x->ufd))) { x->ufd = NULL; cantwrite: errno = 127; return(1); } x->urw = 3; FSEEK(x->ufd,loc,SEEK_SET); } done: x->uwrt = 1; return 0; } int err__fl(int f, int m, const char *s) { if (!f) f__fatal(m, s); if (f__doend) (*f__doend)(); return errno = m; } blis-1.1/blastest/f2c/exit_.c000066400000000000000000000027631474157777200161100ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* This gives the effect of subroutine exit(rc) integer*4 rc stop end * with the added side effect of supplying rc as the program's exit code. */ #include #include "f2c.h" #ifdef __cplusplus extern "C" { #endif void exit_(integer *rc) { exit(*rc); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/exp.c000066400000000000000000000034051474157777200155660ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_exp(real *x) { return( exp(*x) ); } double d_exp(const doublereal *x) { return( exp(*x) ); } void c_exp(complex *r, complex *z) { double expx, zi = z->i; expx = exp(z->r); r->r = expx * cos(zi); r->i = expx * sin(zi); } void z_exp(doublecomplex *r, doublecomplex *z) { double expx, zi = z->i; expx = exp(z->r); r->r = expx * cos(zi); r->i = expx * sin(zi); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/f2c.h000066400000000000000000000265261474157777200154620ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* include/f2c.h. Generated from f2c.h.in by configure. */ /* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_INCLUDE #define F2C_INCLUDE #include #include #ifdef _MSC_VER # include #else # include #endif #ifdef __cplusplus extern "C" { #endif #ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif /* I/O stuff */ /*external read, write*/ typedef struct { flag cierr; ftnint ciunit; flag ciend; char *cifmt; ftnint cirec; } cilist; /*internal read, write*/ typedef struct { flag icierr; char *iciunit; flag iciend; char *icifmt; ftnint icirlen; ftnint icirnum; } icilist; /*open*/ typedef struct { flag oerr; ftnint ounit; char *ofnm; ftnlen ofnmlen; char *osta; char *oacc; char *ofm; ftnint orl; char *oblnk; } olist; /*close*/ typedef struct { flag cerr; ftnint cunit; char *csta; } cllist; /*rewind, backspace, endfile*/ typedef struct { flag aerr; ftnint aunit; } alist; /* inquire */ typedef struct { flag inerr; ftnint inunit; char *infile; ftnlen infilen; ftnint *inex; /*parameters in standard's order*/ ftnint *inopen; ftnint *innum; ftnint *innamed; char *inname; ftnlen innamlen; char *inacc; ftnlen inacclen; char *inseq; ftnlen inseqlen; char *indir; ftnlen indirlen; char *infmt; ftnlen infmtlen; char *inform; ftnint informlen; char *inunf; ftnlen inunflen; ftnint *inrecl; ftnint *innrec; char *inblank; ftnlen inblanklen; } inlist; union Multitype { /* for multiple entry points */ integer1 g; shortint h; integer i; /* longint j; */ real r; doublereal d; complex c; doublecomplex z; }; typedef union Multitype Multitype; struct Vardesc { /* for Namelist */ char *name; char *addr; ftnlen *dims; int type; }; typedef struct Vardesc Vardesc; struct Namelist { char *name; Vardesc **vars; int nvars; }; typedef struct Namelist Namelist; #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define bit_test(a,b) ((a) >> (b) & 1) #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) /* undef any lower-case symbols that your C compiler predefines, e.g.: */ #ifndef Skip_f2c_Undefs /* #undef cray */ /* #undef gcos */ /* #undef mc68010 */ /* #undef mc68020 */ /* #undef mips */ /* #undef pdp11 */ /* #undef sgi */ /* #undef sparc */ /* #undef sun */ /* #undef sun2 */ /* #undef sun3 */ /* #undef sun4 */ /* #undef u370 */ /* #undef u3b */ /* #undef u3b2 */ /* #undef u3b5 */ /* #undef unix */ /* #undef vax */ #endif void libf2c_init(int argc, char **argv); void libf2c_close(); /************************************************************* * LIBF77 */ /* * Private functions and variables in libF77 */ extern int xargc; extern char **xargv; extern doublereal _0; double f__cabs(double, double); char *F77_aloc(integer Len, const char *whence); void sig_die(const char*, int); void _uninit_f2c(void *x, int type, long len); /* * Public functions in libF77 */ int abort_(void); void c_cos(complex *r, complex *z); void c_div(complex *c, complex *a, complex *b); void c_exp(complex *r, complex *z); void c_log(complex *r, complex *z); void c_sin(complex *r, complex *z); void c_sqrt(complex *r, complex *z); double dtime_(float *tarray); int ef1asc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb); integer ef1cmc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb); real etime_(real *tarray); int getarg_(ftnint *n, char *s, ftnlen ls); int getenv_(char *fname, char *value, ftnlen flen, ftnlen vlen); shortint h_indx(char *a, char *b, ftnlen la, ftnlen lb); integer i_indx(char *a, char *b, ftnlen la, ftnlen lb); logical l_ge(char *a, char *b, ftnlen la, ftnlen lb); logical l_gt(char *a, char *b, ftnlen la, ftnlen lb); logical l_le(char *a, char *b, ftnlen la, ftnlen lb); logical l_lt(char *a, char *b, ftnlen la, ftnlen lb); integer lbit_bits(integer a, integer b, integer len); integer lbit_shift(integer a, integer b); integer lbit_cshift(integer a, integer b, integer len); void pow_ci(complex *p, complex *a, integer *b); double pow_dd(doublereal *ap, doublereal *bp); double pow_di(doublereal *ap, integer *bp); shortint pow_hh(shortint *ap, shortint *bp); integer pow_ii(integer *ap, integer *bp); #ifdef INTEGER_STAR_8 longint pow_qq(longint *ap, longint *bp); #endif double pow_ri(real *ap, integer *bp); void pow_zi(doublecomplex*, doublecomplex*, integer*); void pow_zz(doublecomplex *r, doublecomplex *a, doublecomplex *b); #ifdef INTEGER_STAR_8 longint qbit_bits(longint a, integer b, integer len); longint qbit_cshift(longint a, integer b, integer len); longint qbit_shift(longint a, integer b); #endif double r_abs(real *x); double r_acos(real *x); double r_asin(real *x); double r_atan(real *x); double r_atn2(real *x, real *y); void r_cnjg(complex *r, complex *z); double r_cos(real *x); double r_cosh(real *x); double r_dim(real *a, real *b); double r_exp(real *x); double r_imag(complex *z); double r_int(real *x); double r_lg10(real *x); double r_log(real *x); double r_mod(real *x, real *y); double r_nint(real *x); double r_sign(real *a, real *b); double r_sin(real *x); double r_sinh(real *x); double r_sqrt(real *x); double r_tan(real *x); double r_tanh(real *x); int s_cat(char *lp, char *rpp[], ftnint rnp[], ftnint *np, ftnlen ll); integer s_cmp(const char *a0, const char *b0, ftnlen la, ftnlen lb); int s_paus(char *s, ftnlen n); integer s_rnge(char *varn, ftnint offset, char *procn, ftnint line); int s_stop(char *s, ftnlen n); ftnint signal_(integer *sigp, void *proc); integer system_(register char *s, ftnlen n); void z_div(doublecomplex*, doublecomplex*, doublecomplex*); void z_cos(doublecomplex *r, doublecomplex *z); void z_exp(doublecomplex *r, doublecomplex *z); void z_log(doublecomplex *r, doublecomplex *z); void z_sin(doublecomplex *r, doublecomplex *z); void z_sqrt(doublecomplex *r, doublecomplex *z); /* #ifndef F2C_NO_INLINE_H # if defined(__GNUC__) # include # endif #endif */ #if !defined(F2C_INLINE_H) double c_abs(const complex *z); double d_abs(const doublereal *x); double d_acos(const doublereal *x); double d_asin(const doublereal *x); double d_atan(const doublereal *x); double d_atn2(const doublereal *x, const doublereal *y); void d_cnjg(doublecomplex *r, const doublecomplex *z); double d_cos(const doublereal *x); double d_cosh(const doublereal *x); double d_dim(const doublereal *a, const doublereal *b); double d_exp(const doublereal *x); double d_imag(const doublecomplex *z); double d_int(const doublereal *x); double d_lg10(const doublereal *x); double d_log(const doublereal *x); double d_mod(const doublereal *x, const doublereal *y); double d_nint(const doublereal *x); double d_prod(const real *x, const real *y); double d_sign(const doublereal *a, const doublereal *b); double d_sin(const doublereal *x); double d_sinh(const doublereal *x); double d_sqrt(const doublereal *x); double d_tan(const doublereal *x); double d_tanh(const doublereal *x); double derf_(const doublereal *x); double derfc_(const doublereal *x); double erf_(const real *x); double erfc_(const real *x); shortint h_abs(const shortint *x); shortint h_dim(const shortint *a, const shortint *b); shortint h_dnnt(const doublereal *x); shortint h_len(const char *s, ftnlen n); shortint h_mod(const short *a, const short *b); shortint h_nint(const real *x); shortint h_sign(const shortint *a, const shortint *b); shortlogical hl_ge(const char *a, const char *b, ftnlen la, ftnlen lb); shortlogical hl_gt(const char *a, const char *b, ftnlen la, ftnlen lb); shortlogical hl_le(const char *a, const char *b, ftnlen la, ftnlen lb); shortlogical hl_lt(const char *a, const char *b, ftnlen la, ftnlen lb); integer i_abs(const integer *x); integer i_dceiling(const doublereal *x); integer i_dim(const integer *a, const integer *b); integer i_dnnt(const doublereal *x); integer i_len(const char *s, ftnlen n); integer i_len_trim(const char *s, ftnlen n); integer i_mod(const integer *a, const integer *b); integer i_nint(const real *x); integer i_sign(const integer *a, const integer *b); integer i_sceiling(const real *x); ftnint iargc_(void); int s_copy(char *a, const char *b, ftnlen la, ftnlen lb); double z_abs(const doublecomplex *z); #endif /* !F2C_INLINE_H */ /************************************************************* * LIBI77 * * Public functions */ int c_dfe(cilist *a); int c_due(cilist *a); int c_sfe(cilist *a); int c_sue(cilist *a); integer e_rdfe(void); integer e_rdue(void); integer e_rsfe(void); integer e_rsfi(void); integer e_rsle(void); integer e_rsli(void); integer e_rsue(void); integer e_wdfe(void); integer e_wdue(void); integer e_wsfi(void); integer e_wsfe(void); integer e_wsle(void); integer e_wsli(void); integer e_wsue(void); void exit_(integer *rc); integer f_back(alist *a); integer f_clos(cllist *a); integer f_end(alist *a); void f_exit(void); integer f_inqu(inlist *a); integer f_open(olist *a); integer f_rew(alist *a); int flush_(void); integer ftell_(integer *Unit); int fseek_(integer *Unit, integer *offset, integer *whence); #ifdef INTEGER_STAR_8 longint ftell64_(integer *Unit); int fseek64_(integer *Unit, longint *offset, integer *whence); #endif integer s_rdfe(cilist *a); integer s_rdue(cilist *a); integer s_rsfi(icilist *a); integer s_rsle(cilist *a); integer s_rsli(icilist *a); integer s_rsne(cilist *a); integer s_rsni(icilist *a); integer s_rsue(cilist *a); integer s_wdfe(cilist *a); integer s_wdue(cilist *a); integer s_wsfe(cilist *a); integer s_wsfi(icilist *a); integer s_wsle(cilist *a); integer s_wsli(icilist *a); integer s_wsne(cilist *a); integer s_wsni(icilist *a); integer s_wsue(cilist *a); real s_epsilon_( real* x ); double d_epsilon_( doublereal* x ); /* * Private functions in the F2C library */ extern const ftnlen f__typesize[]; #ifdef __cplusplus } #endif #endif blis-1.1/blastest/f2c/f2c_config.h000066400000000000000000000121211474157777200167710ustar00rootroot00000000000000/* config_aux/config.h. Generated from f2c_config.h.in by configure. */ /* config_aux/config.h.in. Generated from configure.ac by autoheader. */ /* Bit size of 'int' */ #define F2C_INT_BITS 32 /* Bit size of 'long' */ #define F2C_LONG_BITS 64 /* Bit sizze of long long */ #define F2C_LONG_LONG_BITS 64 /* Define to 1 if you have the `atexit' function. */ #define HAVE_ATEXIT 1 /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 /* Define to 1 if you have the header file. */ #define HAVE_FENV_H 1 /* Define to 1 if you have the `floor' function. */ /* #undef HAVE_FLOOR */ /* Define to 1 if you have the `fork' function. */ #define HAVE_FORK 1 /* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ #if !defined(_MSC_VER) #define HAVE_FSEEKO 1 #endif /* Define to 1 if you have the `ftruncate' function. */ #if !defined(_MSC_VER) #define HAVE_FTRUNCATE 1 #endif /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 /* Define to 1 if you have the `isascii' function. */ #define HAVE_ISASCII 1 /* Define to 1 if you have the `isatty' function. */ #define HAVE_ISATTY 1 /* Define to 1 if your system has a GNU libc compatible `malloc' function, and to 0 otherwise. */ #define HAVE_MALLOC 1 /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 /* Define to 1 if you have the `memset' function. */ #define HAVE_MEMSET 1 /* Define to 1 if you have the `mkdir' function. */ #define HAVE_MKDIR 1 /* Define to 1 if you have the `mkdtemp' function. */ #define HAVE_MKDTEMP 1 /* Define to 1 if you have the `mkstemp' function. */ #define HAVE_MKSTEMP 1 /* Define to 1 if you have the `onexit' function. */ /* #undef HAVE_ONEXIT */ /* Define to 1 if you have the `pow' function. */ /* #undef HAVE_POW */ /* Define to 1 if your system has a GNU libc compatible `realloc' function, and to 0 otherwise. */ #define HAVE_REALLOC 1 /* Define to 1 if you have the `rmdir' function. */ #define HAVE_RMDIR 1 /* Define to 1 if you have the `sqrt' function. */ /* #undef HAVE_SQRT */ /* Define to 1 if you have the header file. */ #define HAVE_STDDEF_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the `strchr' function. */ #define HAVE_STRCHR 1 /* Define to 1 if you have the header file. */ #define HAVE_STRINGS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if you have the `tmpfile' function. */ #define HAVE_TMPFILE 1 /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* Define to 1 if you have the `vfork' function. */ #define HAVE_VFORK 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_VFORK_H */ /* Define to 1 if `fork' works. */ #define HAVE_WORKING_FORK 1 /* Define to 1 if `vfork' works. */ #define HAVE_WORKING_VFORK 1 /* Define to the sub-directory where libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" /* Name of package */ #define PACKAGE "f2c" /* Define to the address where bug reports for this package should be sent. */ #define PACKAGE_BUGREPORT "jjgarcia@users.sourceforge.net" /* Define to the full name of this package. */ #define PACKAGE_NAME "F2C Fortran to C99 compiler" /* Define to the full name and version of this package. */ #define PACKAGE_STRING "F2C Fortran to C99 compiler 12.02.01" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "f2c" /* Define to the home page for this package. */ #define PACKAGE_URL "" /* Define to the version of this package. */ #define PACKAGE_VERSION "12.02.01" /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Version number of package */ #define VERSION "12.02.01" /* Enable large inode numbers on Mac OS X 10.5. */ #ifndef _DARWIN_USE_64_BIT_INODE # define _DARWIN_USE_64_BIT_INODE 1 #endif /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ /* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ /* #undef _LARGEFILE_SOURCE */ /* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus /* #undef inline */ #endif /* Define to rpl_malloc if the replacement function should be used. */ /* #undef malloc */ /* Define to `int' if does not define. */ /* #undef pid_t */ /* Define to rpl_realloc if the replacement function should be used. */ /* #undef realloc */ /* Define to `unsigned int' if does not define. */ /* #undef size_t */ /* Define as `fork' if `vfork' does not work. */ /* #undef vfork */ #ifdef _MSC_VER #define NON_UNIX_STDIO 1 #endifblis-1.1/blastest/f2c/f2c_inline.h000066400000000000000000000135271474157777200170150ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* f2c_inline.h -- Standard Fortran to C header file */ #ifndef F2C_INLINE_H #define F2C_INLINE_H #ifndef F2C_INCLUDE #error f2c_include.h cannot be included as is #endif static inline double c_abs(const complex *z) { return hypot(z->r, z->i); } static inline double d_abs(const double *x) { return fabs(*x); } static inline double d_acos(const double *x) { return acos(*x); } static inline double d_acosh(const double *x) { return acosh(*x); } static inline double d_asin(const double *x) { return asin(*x); } static inline double d_asinh(const double *x) { return asinh(*x); } static inline double d_atan(const double *x) { return atan(*x); } static inline double d_atanh(const double *x) { return atanh(*x); } static inline double d_atn2(const double *x, double *y) { return atan2(*x, *y); } static inline void d_cnjg(doublecomplex *r, const doublecomplex *z) { r->r = z->r; r->i = -z->i; } static inline double d_cos(const double *x) { return cos(*x); } static inline double d_cosh(const double *x) { return cosh(*x); } static inline double d_dim(const double *a, double *b) { double d = (*a - *b); return (d > 0)? d : 0; } static inline double d_exp(const double *x) { return exp(*x); } static inline double d_imag(doublecomplex *x) { return x->i; } static inline double d_int(const double *x) { double y = *x; return (y < 0)? floor(y) : -floor(-y); } static inline double d_lg10(const double *x) { return log10(*x); } static inline double d_log(const double *x) { return log(*x); } static inline double d_nint(const double *x) { return round(*x); } static inline double d_prod(const float *x, const float *y) { return ((double)*x) * ((double)*x); } static inline double d_sin(const double *x) { return sin(*x); } static inline double d_tan(const double *x) { return tan(*x); } static inline double d_sinh(const double *x) { return sinh(*x); } static inline double d_sqrt(const double *x) { return sqrt(*x); } static inline double d_tanh(const double *x) { return tanh(*x); } static inline double d_sign(const double *a, const double *b) { double x = fabs(*a); return (*b >= 0 ? x : -x); } static inline double derfc_(const double *x) { return erfc(*x); } static inline double derf_(const double *x) { return erf(*x); } static inline double erf_(const float *x) { return erf((double)(*x)); } static inline double erfc_(const float *x) { return erfc((double)(*x)); } static inline shortint h_abs(const shortint *x) { return abs(*x); } static inline shortint h_dim(const shortint *a, const shortint *b) { shortint d = (*a - *b); return (d > 0)? d : 0; } static inline shortint h_len(const char *s, ftnlen n) { return n; } static inline shortint h_mod(const shortint *a, const shortint *b) { return *a % *b; } static inline shortint h_nint(const float *x) { return (shortint)round(*x); } static inline shortint h_dnnt(const doublereal *x) { return (shortint)round(*x); } static inline shortint h_sign(const shortint *a, const shortint *b) { shortint x = abs(*a); return *b >= 0 ? x : -x; } static inline shortlogical hl_ge(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) >= 0; } static inline shortlogical hl_le(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) >= 0; } static inline shortlogical hl_gt(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) > 0; } static inline shortlogical hl_lt(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) < 0; } static inline integer i_abs(const integer *x) { return abs(*x); } static inline integer i_dim(const integer *a, const integer *b) { integer d = (*a - *b); return (d > 0)? d : 0; } static inline integer i_len(const char *s, ftnlen n) { return n; } static inline integer i_mod(const integer *a, const integer *b) { return *a % *b; } static inline integer i_nint(const float *x) { return (integer)round(*x); } static inline integer i_dnnt(const doublereal *x) { return (integer)round(*x); } static inline integer i_sign(const integer *a, const integer *b) { integer x = abs(*a); return *b >= 0 ? x : -x; } static inline ftnint iargc_(void) { return xargc - 1; } static inline double z_abs(const doublecomplex *z) { return hypot(z->r, z->i); } static int s_copy(char *a, const char *b, ftnlen la, ftnlen lb) { if (la <= lb) { memmove(a, b, la); } else { memmove(a, b, lb); memset(a, ' ', la - lb); } return 0; } static inline integer i_sceiling(const real *r) { real x = *r; return ((integer)(x) + ((x) > 0 && (x) != (integer)(x))); } static inline integer i_dceiling(const doublereal *r) { doublereal x = *r; return ((integer)(x) + ((x) > 0 && (x) != (integer)(x))); } #endif /* !F2C_INLINE_H */ blis-1.1/blastest/f2c/f2c_types.h000066400000000000000000000104511474157777200166740ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* include/f2c_types.h. Generated from f2c_types.h.in by configure. */ /* include/f2c.h. Generated from f2c.h.in by configure. */ /* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_TYPES_H #define F2C_TYPES_H #ifdef HAVE_BLIS_H #include #define BLIS_VIA_BLASTEST #include "blis.h" #endif #ifdef __cplusplus extern "C" { #endif /* Define to the number of bits in an integer */ #define F2C_INT_BITS 32 /* Define to the number of bits in a long integer */ #define F2C_LONG_BITS 64 /* Define to the number of bits in a long long integer, if it exists */ #define F2C_LONG_LONG_BITS 64 #ifdef HAVE_BLIS_H #if BLIS_BLAS_INT_TYPE_SIZE == 32 typedef int32_t integer; #elif BLIS_BLAS_INT_TYPE_SIZE == 64 typedef int64_t integer; #else typedef long int integer; #endif //typedef int integer; typedef unsigned int uinteger; #endif #if F2C_INT_BITS == 32 # if F2C_LONG_BITS == 64 typedef long int longint; typedef unsigned long int ulongint; # define INTEGER_STAR_8 # elif defined(F2C_LONG_LONG_BITS) # if F2C_LONG_LONG_BITS == 64 typedef long long int longint; typedef unsigned long long int ulongint; # define INTEGER_STAR_8 # endif # endif #endif typedef char integer1; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef integer logical; typedef shortint shortlogical; typedef integer1 logical1; #ifdef f2c_i2 /* for -i2 */ typedef short flag; #ifndef HAVE_BLIS_H // don't re-typedef ftnlen typedef short ftnlen; #endif typedef short ftnint; #else typedef integer flag; #ifndef HAVE_BLIS_H // don't re-typedef ftnlen typedef integer ftnlen; #endif typedef integer ftnint; #endif /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ void (*C_fp)(...); typedef /* Double Complex */ void (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ void (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ void (*C_fp)(); typedef /* Double Complex */ void (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ void (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef void C_f; /* complex function */ typedef void H_f; /* character function */ typedef void Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ #ifdef __cplusplus } #endif #endif /* F2C_TYPES_H */ blis-1.1/blastest/f2c/f2c_types_win.h000066400000000000000000000044701474157777200175550ustar00rootroot00000000000000/* include/f2c.h. Generated from f2c.h.in by configure. */ /* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_TYPES_WIN_H #define F2C_TYPES_WIN_H #ifdef __cplusplus extern "C" { #endif /* Define to the number of bits in an integer */ #define F2C_INT_BITS 32 /* Define to the number of bits in a long integer */ #define F2C_LONG_BITS 64 typedef int integer; typedef unsigned int uinteger; typedef __int64 longint; typedef unsigned __int64 ulongint; /*#define INTEGER_STAR_8*/ typedef char integer1; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef integer logical; typedef shortint shortlogical; typedef integer1 logical1; #ifdef f2c_i2 /* for -i2 */ typedef short flag; typedef short ftnlen; typedef short ftnint; #else typedef integer flag; typedef integer ftnlen; typedef integer ftnint; #endif /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ void (*C_fp)(...); typedef /* Double Complex */ void (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ void (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ void (*C_fp)(); typedef /* Double Complex */ void (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ void (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef void C_f; /* complex function */ typedef void H_f; /* character function */ typedef void Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ #ifdef __cplusplus } #endif #endif /* F2C_TYPES_H */ blis-1.1/blastest/f2c/fio.h000066400000000000000000000114521474157777200155550ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #include #include #include #ifdef HAVE_FSEEKO #define OFF_T off_t #define FSEEK fseeko #define FTELL ftello #else #define OFF_T long #define FSEEK fseek #define FTELL ftell #endif #ifdef MSDOS #ifndef NON_UNIX_STDIO #define NON_UNIX_STDIO #endif #endif typedef long uiolen; /*units*/ typedef struct { FILE *ufd; /*0=unconnected*/ char *ufnm; #ifndef MSDOS long uinode; int udev; #endif int url; /*0=sequential*/ flag useek; /*true=can backspace, use dir, ...*/ flag ufmt; flag urw; /* (1 for can read) | (2 for can write) */ flag ublnk; flag uend; flag uwrt; /*last io was write*/ flag uscrtch; } unit; extern int (*f__getn)(void); /* for formatted input */ extern void (*f__putn)(int); /* for formatted output */ extern void x_putc(int); extern long f__inode(char*,int*); extern void sig_die(const char*,int); extern void f__fatal(int, const char*); extern int t_runc(alist*); extern int f__nowreading(unit*), f__nowwriting(unit*); extern int fk_open(int,int,ftnint); extern int en_fio(void); extern void f_init(void); extern int (*f__donewrec)(void), t_putc(int), x_wSL(void); extern void b_char(const char*,char*,ftnlen), g_char(const char*,ftnlen,char*); extern int c_sfe(cilist*); extern int z_rnew(void); extern int err__fl(int,int,const char*); extern int xrd_SL(void); extern int f__putbuf(int); extern int f__canseek(FILE *f); extern int z_getc(void); extern void z_putc(int c); extern integer f_open(olist *a); #ifdef INTEGER_STAR_8 extern char *f__icvt(longint value, int *ndigit, int *sign, int base); #else extern char *f__icvt(integer value, int *ndigit, int *sign, int base); #endif extern int t_getc(void); extern flag f__init; extern cilist *f__elist; /*active external io list*/ extern flag f__reading,f__external,f__sequential,f__formatted; extern int (*f__doend)(void); extern FILE *f__cf; /*current file*/ extern unit *f__curunit; /*current unit*/ extern unit f__units[]; extern char *f__icptr; extern char *f__icend; extern icilist *f__svic; extern int f__icnum; #define err(f,m,s) {if(f) errno= m; else f__fatal(m,s); return(m);} #define errfl(f,m,s) return err__fl((int)f,m,s) /*Table sizes*/ #define MXUNIT 100 extern int f__recpos; /*position in current record*/ extern OFF_T f__cursor; /* offset to move to */ extern OFF_T f__hiwater; /* so TL doesn't confuse us */ #define WRITE 1 #define READ 2 #define SEQ 3 #define DIR 4 #define FMT 5 #define UNF 6 #define EXT 7 #define INT 8 #define buf_end(x) (x->_flag & _IONBF ? x->_ptr : x->_base + BUFSIZ) extern const char *f__fmtbuf; extern const char *f__r_mode[2]; extern const char *f__w_mode[]; extern int l_eof; extern int c_le(cilist *a); extern int l_read(ftnint *number, char *ptr, ftnlen len, ftnint type); extern int l_write(ftnint *number, char *ptr, ftnlen len, ftnint type); extern flag f__lquit; extern int f__lcount; extern char *f__icptr; extern char *f__icend; extern icilist *f__svic; extern int f__icnum, f__recpos; extern int f__Aquote; extern int x_rsne(cilist*); extern void x_wsne(cilist *a); extern flag f__lquit; extern int f__lcount, nml_read; extern int t_getc(void); extern uiolen f__reclen; extern ftnint L_len; extern int f__scale; extern int (*l_getc)(void); extern int (*l_ungetc)(int,FILE*); extern int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); int do_us(ftnint *number, char *ptr, ftnlen len); integer do_ud(ftnint *number, char *ptr, ftnlen len); integer do_uio(ftnint *number, char *ptr, ftnlen len); integer do_fio(ftnint *number, char *ptr, ftnlen len); int en_fio(void); extern int x_wSL(void); extern int x_getc(void); extern int x_endp(void); blis-1.1/blastest/f2c/fmt.c000066400000000000000000000214211474157777200155560ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" #define skip(s) while(*s==' ') s++ #ifdef interdata #define SYLMX 300 #endif #ifdef pdp11 #define SYLMX 300 #endif #ifdef vax #define SYLMX 300 #endif #ifndef SYLMX #define SYLMX 300 #endif #define GLITCH '\2' /* special quote character for stu */ extern flag f__cblank,f__cplus; /*blanks in I and compulsory plus*/ static struct syl f__syl[SYLMX]; int f__parenlvl,f__pc,f__revloc; static const char *ap_end(const char *s) { char quote; quote= *s++; for(;*s;s++) { if(*s!=quote) continue; if(*++s!=quote) return(s); } if(f__elist->cierr) { errno = 100; return(NULL); } f__fatal(100, "bad string"); /*NOTREACHED*/ return 0; } static int op_gen(int a, int b, int c, int d) { struct syl *p= &f__syl[f__pc]; if(f__pc>=SYLMX) { fprintf(stderr,"format too complicated:\n"); sig_die(f__fmtbuf, 1); } p->op=a; p->p1=b; p->p2.i[0]=c; p->p2.i[1]=d; return(f__pc++); } static const char *f_list(const char*); static const char *gt_num(const char *s, int *n, int n1) { int m=0,f__cnt=0; char c; for(c= *s;;c = *s) { if(c==' ') { s++; continue; } if(c>'9' || c<'0') break; m=10*m+c-'0'; f__cnt++; s++; } if(f__cnt==0) { if (!n1) s = 0; *n=n1; } else *n=m; return(s); } static const char *f_s(const char *s, int curloc) { skip(s); if(*s++!='(') { return(NULL); } if(f__parenlvl++ ==1) f__revloc=curloc; if(op_gen(RET1,curloc,0,0)<0 || (s=f_list(s))==NULL) { return(NULL); } skip(s); return(s); } static int ne_d(const char *s, const char **p) { int n,x,sign=0; struct syl *sp; switch(*s) { default: return(0); case ':': (void) op_gen(COLON,0,0,0); break; case '$': (void) op_gen(NONL, 0, 0, 0); break; case 'B': case 'b': if(*++s=='z' || *s == 'Z') (void) op_gen(BZ,0,0,0); else (void) op_gen(BN,0,0,0); break; case 'S': case 's': if(*(s+1)=='s' || *(s+1) == 'S') { x=SS; s++; } else if(*(s+1)=='p' || *(s+1) == 'P') { x=SP; s++; } else x=S; (void) op_gen(x,0,0,0); break; case '/': (void) op_gen(SLASH,0,0,0); break; case '-': sign=1; case '+': s++; /*OUTRAGEOUS CODING TRICK*/ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (!(s=gt_num(s,&n,0))) { bad: *p = 0; return 1; } switch(*s) { default: return(0); case 'P': case 'p': if(sign) n= -n; (void) op_gen(P,n,0,0); break; case 'X': case 'x': (void) op_gen(X,n,0,0); break; case 'H': case 'h': sp = &f__syl[op_gen(H,n,0,0)]; sp->p2.s = (char*)s + 1; s+=n; break; } break; case GLITCH: case '"': case '\'': sp = &f__syl[op_gen(APOS,0,0,0)]; sp->p2.s = (char*)s; if((*p = ap_end(s)) == NULL) return(0); return(1); case 'T': case 't': if(*(s+1)=='l' || *(s+1) == 'L') { x=TL; s++; } else if(*(s+1)=='r'|| *(s+1) == 'R') { x=TR; s++; } else x=T; if (!(s=gt_num(s+1,&n,0))) goto bad; s--; (void) op_gen(x,n,0,0); break; case 'X': case 'x': (void) op_gen(X,1,0,0); break; case 'P': case 'p': (void) op_gen(P,1,0,0); break; } s++; *p=s; return(1); } static int e_d(const char *s, const char **p) { int i,im,n,w,d,e,found=0,x=0; const char *sv=s; s=gt_num(s,&n,1); (void) op_gen(STACK,n,0,0); switch(*s++) { default: break; case 'E': case 'e': x=1; case 'G': case 'g': found=1; if (!(s=gt_num(s,&w,0))) { bad: *p = 0; return 1; } if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; if(*s!='E' && *s != 'e') (void) op_gen(x==1?E:G,w,d,0); /* default is Ew.dE2 */ else { if (!(s=gt_num(s+1,&e,0))) goto bad; (void) op_gen(x==1?EE:GE,w,d,e); } break; case 'O': case 'o': i = O; im = OM; goto finish_I; case 'Z': case 'z': i = Z; im = ZM; goto finish_I; case 'L': case 'l': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; (void) op_gen(L,w,0,0); break; case 'A': case 'a': found=1; skip(s); if(*s>='0' && *s<='9') { s=gt_num(s,&w,1); if(w==0) break; (void) op_gen(AW,w,0,0); break; } (void) op_gen(A,0,0,0); break; case 'F': case 'f': if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(F,w,d,0); break; case 'D': case 'd': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(D,w,d,0); break; case 'I': case 'i': i = I; im = IM; finish_I: if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s!='.') { (void) op_gen(i,w,0,0); break; } if (!(s=gt_num(s+1,&d,0))) goto bad; (void) op_gen(im,w,d,0); break; } if(found==0) { f__pc--; /*unSTACK*/ *p=sv; return(0); } *p=s; return(1); } static const char *i_tem(const char *s) { const char *t; int n,curloc; if(*s==')') return(s); if(ne_d(s,&t)) return(t); if(e_d(s,&t)) return(t); s=gt_num(s,&n,1); if((curloc=op_gen(STACK,n,0,0))<0) return(NULL); return(f_s(s,curloc)); } static const char *f_list(const char *s) { for(;*s!=0;) { skip(s); if((s=i_tem(s))==NULL) return(NULL); skip(s); if(*s==',') s++; else if(*s==')') { if(--f__parenlvl==0) { (void) op_gen(REVERT,f__revloc,0,0); return(++s); } (void) op_gen(GOTO,0,0,0); return(++s); } } return(NULL); } int pars_f(const char *s) { f__parenlvl=f__revloc=f__pc=0; if(f_s(s,0) == NULL) { return(-1); } return(0); } #define STKSZ 10 int f__cnt[STKSZ],f__ret[STKSZ],f__cp,f__rp; flag f__workdone, f__nonl; static int type_f(int n) { switch(n) { default: return(n); case RET1: return(RET1); case REVERT: return(REVERT); case GOTO: return(GOTO); case STACK: return(STACK); case X: case SLASH: case APOS: case H: case T: case TL: case TR: return(NED); case F: case I: case IM: case A: case AW: case O: case OM: case L: case E: case EE: case D: case G: case GE: case Z: case ZM: return(ED); } } integer do_fio(ftnint *number, char *ptr, ftnlen len) { struct syl *p; int n,i; for(i=0;i<*number;i++,ptr+=len) { loop: switch(type_f((p= &f__syl[f__pc])->op)) { default: fprintf(stderr,"unknown code in do_fio: %d\n%s\n", p->op,f__fmtbuf); err(f__elist->cierr,100,"do_fio"); case NED: if((*f__doned)(p)) { f__pc++; goto loop; } f__pc++; continue; case ED: if(f__cnt[f__cp]<=0) { f__cp--; f__pc++; goto loop; } if(ptr==NULL) return((*f__doend)()); f__cnt[f__cp]--; f__workdone=1; if((n=(*f__doed)(p,ptr,len))>0) errfl(f__elist->cierr,errno,"fmt"); if(n<0) err(f__elist->ciend,(EOF),"fmt"); continue; case STACK: f__cnt[++f__cp]=p->p1; f__pc++; goto loop; case RET1: f__ret[++f__rp]=p->p1; f__pc++; goto loop; case GOTO: if(--f__cnt[f__cp]<=0) { f__cp--; f__rp--; f__pc++; goto loop; } f__pc=1+f__ret[f__rp--]; goto loop; case REVERT: f__rp=f__cp=0; f__pc = p->p1; if(ptr==NULL) return((*f__doend)()); if(!f__workdone) return(0); if((n=(*f__dorevert)()) != 0) return(n); goto loop; case COLON: if(ptr==NULL) return((*f__doend)()); f__pc++; goto loop; case NONL: f__nonl = 1; f__pc++; goto loop; case S: case SS: f__cplus=0; f__pc++; goto loop; case SP: f__cplus = 1; f__pc++; goto loop; case P: f__scale=p->p1; f__pc++; goto loop; case BN: f__cblank=0; f__pc++; goto loop; case BZ: f__cblank=1; f__pc++; goto loop; } } return(0); } int en_fio(void) { ftnint one=1; return(do_fio(&one,(char *)NULL,(ftnint)0)); } void fmt_bg(void) { f__workdone=f__cp=f__rp=f__pc=f__cursor=0; f__cnt[0]=f__ret[0]=0; } blis-1.1/blastest/f2c/fmt.h000066400000000000000000000054631474157777200155730ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ struct syl { int op; int p1; union { int i[2]; char *s;} p2; }; #define RET1 1 #define REVERT 2 #define GOTO 3 #define X 4 #define SLASH 5 #define STACK 6 #define I 7 #define ED 8 #define NED 9 #define IM 10 #define APOS 11 #define H 12 #define TL 13 #define TR 14 #define T 15 #define COLON 16 #define S 17 #define SP 18 #define SS 19 #define P 20 #define BN 21 #define BZ 22 #define F 23 #define E 24 #define EE 25 #define D 26 #define G 27 #define GE 28 #define L 29 #define A 30 #define AW 31 #define O 32 #define NONL 33 #define OM 34 #define Z 35 #define ZM 36 typedef union { real pf; doublereal pd; } ufloat; typedef union { short is; signed char ic; integer il; #ifdef Allow_TYQUAD longint ili; #endif } Uint; #ifdef __cplusplus extern "C" { #define Cextern extern "C" #else #define Cextern extern #endif /* __cplusplus */ extern const char *f__fmtbuf; extern int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); extern int (*f__dorevert)(void); extern void fmt_bg(void); extern int pars_f(const char*); extern int rd_ed(struct syl*, char*, ftnlen),rd_ned(struct syl*); extern int w_ed(struct syl*, char*, ftnlen),w_ned(struct syl*); extern int wrt_E(ufloat*, int, int, int, ftnlen); extern int wrt_F(ufloat*, int, int, ftnlen); extern int wrt_L(Uint*, int, ftnlen); extern int f__pc,f__parenlvl,f__revloc; extern flag f__cblank,f__cplus,f__workdone, f__nonl; extern int f__scale; #ifdef __cplusplus } #endif #define GET(x) if((x=(*f__getn)())<0) return(x) #define VAL(x) (x!='\n'?x:' ') #define PUT(x) (*f__putn)(x) #undef TYQUAD #ifndef Allow_TYQUAD #undef longint #define longint long #else #define TYQUAD 14 #endif blis-1.1/blastest/f2c/fmtlib.c000066400000000000000000000037271474157777200162560ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include /* @(#)fmtlib.c 1.2 */ #define MAXINTLENGTH 23 #include "f2c.h" #ifndef Allow_TYQUAD #undef longint #define longint long #undef ulongint #define ulongint unsigned long #endif #ifdef INTEGER_STAR_8 char *f__icvt(longint value, int *ndigit, int *sign, int base) #else char *f__icvt(integer value, int *ndigit, int *sign, int base) #endif { static char buf[MAXINTLENGTH+1]; register int i; ulongint uvalue; if(value > 0) { uvalue = value; *sign = 0; } else if (value < 0) { uvalue = -value; *sign = 1; } else { *sign = 0; *ndigit = 1; buf[MAXINTLENGTH-1] = '0'; return &buf[MAXINTLENGTH-1]; } i = MAXINTLENGTH; do { buf[--i] = (uvalue%base) + '0'; uvalue /= base; } while(uvalue > 0); *ndigit = MAXINTLENGTH - i; return &buf[i]; } blis-1.1/blastest/f2c/fp.h000066400000000000000000000035251474157777200154070ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #define FMAX 40 #define EXPMAXDIGS 8 #define EXPMAX 99999999 /* FMAX = max number of nonzero digits passed to atof() */ /* EXPMAX = 10^EXPMAXDIGS - 1 = largest allowed exponent absolute value */ #ifdef V10 /* Research Tenth-Edition Unix */ #include "local.h" #endif /* MAXFRACDIGS and MAXINTDIGS are for wrt_F -- bounds (not necessarily tight) on the maximum number of digits to the right and left of * the decimal point. */ #ifdef VAX #define MAXFRACDIGS 56 #define MAXINTDIGS 38 #else #ifdef CRAY #define MAXFRACDIGS 9880 #define MAXINTDIGS 9864 #else /* values that suffice for IEEE double */ #define MAXFRACDIGS 344 #define MAXINTDIGS 308 #endif #endif blis-1.1/blastest/f2c/h_dnnt.c000066400000000000000000000025421474157777200162450ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_dnnt(const doublereal *x) { return (shortint)round(*x); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/hl_cmp.c000066400000000000000000000035201474157777200162320ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif extern integer s_cmp(const char *, const char *, ftnlen, ftnlen); shortlogical hl_ge(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) >= 0); } shortlogical hl_gt(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) > 0); } shortlogical hl_le(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) <= 0); } shortlogical hl_lt(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) < 0); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/i_dnnt.c000066400000000000000000000025401474157777200162440ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif integer i_dnnt(const doublereal *x) { return (integer)round(*x); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/i_len.c000066400000000000000000000025151474157777200160610ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif integer i_len(const char *s, ftnlen n) { return(n); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/imag.c000066400000000000000000000025671474157777200157170ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_imag(complex *z) { return z->i; } double d_imag(const doublecomplex *z) { return z->i; } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/int.c000066400000000000000000000027251474157777200155700ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double d_int(const doublereal *x) { return( (*x>0) ? floor(*x) : -floor(- *x) ); } double r_int(real *x) { return( (*x>0) ? floor(*x) : -floor(- *x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/l_cmp.c000066400000000000000000000034221474157777200160630ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif extern integer s_cmp(const char *a0, const char *b0, ftnlen la, ftnlen lb); logical l_ge(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) >= 0); } logical l_gt(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) > 0); } logical l_le(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) <= 0); } logical l_lt(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) < 0); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/lg10.c000066400000000000000000000026471474157777200155440ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_lg10(real *x) { return( log10(*x) ); } double d_lg10(const doublereal *x) { return( log10(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/lio.h000066400000000000000000000040501474157777200155570ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* copy of ftypes from the compiler */ /* variable types * numeric assumptions: * int < reals < complexes * TYDREAL-TYREAL = TYDCOMPLEX-TYCOMPLEX */ /* 0-10 retain their old (pre LOGICAL*1, etc.) */ /* values to allow mixing old and new objects. */ #define TYUNKNOWN 0 #define TYADDR 1 #define TYSHORT 2 #define TYLONG 3 #define TYREAL 4 #define TYDREAL 5 #define TYCOMPLEX 6 #define TYDCOMPLEX 7 #define TYLOGICAL 8 #define TYCHAR 9 #define TYSUBR 10 #define TYINT1 11 #define TYLOGICAL1 12 #define TYLOGICAL2 13 #ifdef Allow_TYQUAD #undef TYQUAD #define TYQUAD 14 #endif #define LINTW 24 #define LINE 80 #define LLOGW 2 #define LGFMT "%.9G" /* LEFBL 20 should suffice; 24 overcomes a NeXT bug. */ #define LEFBL 24 typedef union { char flchar; short flshort; ftnint flint; #ifdef Allow_TYQUAD longint fllongint; #endif real flreal; doublereal fldouble; } flex; blis-1.1/blastest/f2c/log.c000066400000000000000000000033431474157777200155540ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_log(real *x) { return( log(*x) ); } double d_log(const doublereal *x) { return( log(*x) ); } void c_log(complex *r, complex *z) { double zi, zr; r->i = atan2(zi = z->i, zr = z->r); r->r = log( hypot(zr, zi) ); } void z_log(doublecomplex *r, doublecomplex *z) { double zi, zr; r->i = atan2(zi = z->i, zr = z->r); r->r = log( hypot(zr, zi) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/lread.c000066400000000000000000000351371474157777200160700ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include "f2c.h" #include "fio.h" /* Compile with -DF8X_NML_ELIDE_QUOTES to permit eliding quotation */ /* marks in namelist input a la the Fortran 8X Draft published in */ /* the May 1989 issue of Fortran Forum. */ #ifdef Allow_TYQUAD static longint f__llx; #endif #undef abs #undef min #undef max #include #include "fmt.h" #include "lio.h" #include "fp.h" int l_eof; int (*l_getc)(void); int (*l_ungetc)(int,FILE*); int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); #define isblnk(x) (f__ltab[x+1]&B) #define issep(x) (f__ltab[x+1]&SX) #define isapos(x) (f__ltab[x+1]&AX) #define isexp(x) (f__ltab[x+1]&EX) #define issign(x) (f__ltab[x+1]&SG) #define iswhit(x) (f__ltab[x+1]&WH) #define SX 1 #define B 2 #define AX 4 #define EX 8 #define SG 16 #define WH 32 static char f__ltab[128+1] = { /* offset one for EOF */ 0, 0,0,AX,0,0,0,0,0,0,WH|B,SX|WH,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, SX|B|WH,0,AX,0,0,0,0,AX,0,0,0,SG,SX,SG,0,SX, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, AX,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; #ifdef ungetc static int un_getc(int x, FILE *f__cf) { return ungetc(x,f__cf); } #else #define un_getc ungetc #endif int t_getc(void) { int ch; if(f__curunit->uend) return(EOF); if((ch=getc(f__cf))!=EOF) return(ch); if(feof(f__cf)) f__curunit->uend = l_eof = 1; return(EOF); } integer e_rsle(void) { int ch; if(f__curunit->uend) return(0); while((ch=t_getc())!='\n') if (ch == EOF) { if(feof(f__cf)) f__curunit->uend = l_eof = 1; return EOF; } return(0); } flag f__lquit; int f__lcount,f__ltype,nml_read; char *f__lchar; double f__lx,f__ly; #define ERR(x) if(n=(x)) return(n) #define GETC(x) (x=(*l_getc)()) #define Ungetc(x,y) (*l_ungetc)(x,y) static int l_R(int poststar, int reqint) { char s[FMAX+EXPMAXDIGS+4]; register int ch; register char *sp, *spe, *sp1; long e, exp; int havenum, havestar, se; if (!poststar) { if (f__lcount > 0) return(0); f__lcount = 1; } #ifdef Allow_TYQUAD f__llx = 0; #endif f__ltype = 0; exp = 0; havestar = 0; retry: sp1 = sp = s; spe = sp + FMAX; havenum = 0; switch(GETC(ch)) { case '-': *sp++ = ch; sp1++; spe++; case '+': GETC(ch); } while(ch == '0') { ++havenum; GETC(ch); } while(isdigit(ch)) { if (sp < spe) *sp++ = ch; else ++exp; GETC(ch); } if (ch == '*' && !poststar) { if (sp == sp1 || exp || *s == '-') { errfl(f__elist->cierr,112,"bad repetition count"); } poststar = havestar = 1; *sp = 0; f__lcount = atoi(s); goto retry; } if (ch == '.') { #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT if (reqint) errfl(f__elist->cierr,115,"invalid integer"); #endif GETC(ch); if (sp == sp1) while(ch == '0') { ++havenum; --exp; GETC(ch); } while(isdigit(ch)) { if (sp < spe) { *sp++ = ch; --exp; } GETC(ch); } } havenum += sp - sp1; se = 0; if (issign(ch)) goto signonly; if (havenum && isexp(ch)) { #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT if (reqint) errfl(f__elist->cierr,115,"invalid integer"); #endif GETC(ch); if (issign(ch)) { signonly: if (ch == '-') se = 1; GETC(ch); } if (!isdigit(ch)) { bad: errfl(f__elist->cierr,112,"exponent field"); } e = ch - '0'; while(isdigit(GETC(ch))) { e = 10*e + ch - '0'; if (e > EXPMAX) goto bad; } if (se) exp -= e; else exp += e; } (void) Ungetc(ch, f__cf); if (sp > sp1) { ++havenum; while(*--sp == '0') ++exp; if (exp) sprintf(sp+1, "e%ld", exp); else sp[1] = 0; f__lx = atof(s); #ifdef Allow_TYQUAD if (reqint&2 && (se = sp - sp1 + exp) > 14 && se < 20) { /* Assuming 64-bit longint and 32-bit long. */ if (exp < 0) sp += exp; if (sp1 <= sp) { f__llx = *sp1 - '0'; while(++sp1 <= sp) f__llx = 10*f__llx + (*sp1 - '0'); } while(--exp >= 0) f__llx *= 10; if (*s == '-') f__llx = -f__llx; } #endif } else f__lx = 0.; if (havenum) f__ltype = TYLONG; else switch(ch) { case ',': case '/': break; default: if (havestar && ( ch == ' ' ||ch == '\t' ||ch == '\n')) break; if (nml_read > 1) { f__lquit = 2; return 0; } errfl(f__elist->cierr,112,"invalid number"); } return 0; } static int rd_count(register int ch) { if (ch < '0' || ch > '9') return 1; f__lcount = ch - '0'; while(GETC(ch) >= '0' && ch <= '9') f__lcount = 10*f__lcount + ch - '0'; Ungetc(ch,f__cf); return f__lcount <= 0; } static int l_C(void) { int ch, nml_save; double lz; if(f__lcount>0) return(0); f__ltype=0; GETC(ch); if(ch!='(') { if (nml_read > 1 && (ch < '0' || ch > '9')) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } if (rd_count(ch)) if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"complex format"); else err(f__elist->cierr,(EOF),"lread"); if(GETC(ch)!='*') { if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"no star"); else err(f__elist->cierr,(EOF),"lread"); } if(GETC(ch)!='(') { Ungetc(ch,f__cf); return(0); } } else f__lcount = 1; while(iswhit(GETC(ch))); Ungetc(ch,f__cf); nml_save = nml_read; nml_read = 0; if (ch = l_R(1,0)) return ch; if (!f__ltype) errfl(f__elist->cierr,112,"no real part"); lz = f__lx; while(iswhit(GETC(ch))); if(ch!=',') { (void) Ungetc(ch,f__cf); errfl(f__elist->cierr,112,"no comma"); } while(iswhit(GETC(ch))); (void) Ungetc(ch,f__cf); if (ch = l_R(1,0)) return ch; if (!f__ltype) errfl(f__elist->cierr,112,"no imaginary part"); while(iswhit(GETC(ch))); if(ch!=')') errfl(f__elist->cierr,112,"no )"); f__ly = f__lx; f__lx = lz; #ifdef Allow_TYQUAD f__llx = 0; #endif nml_read = nml_save; return(0); } static char nmLbuf[256], *nmL_next; static int (*nmL_getc_save)(void); static int (*nmL_ungetc_save)(int, FILE*); static int nmL_getc(void) { int rv; if (rv = *nmL_next++) return rv; l_getc = nmL_getc_save; l_ungetc = nmL_ungetc_save; return (*l_getc)(); } static int nmL_ungetc(int x, FILE *f) { /* f = f;*/ /* banish non-use warning */ ( void )f; return *--nmL_next = x; } static int Lfinish(int ch, int dot, int *rvp) { char *s, *se; static char what[] = "namelist input"; s = nmLbuf + 2; se = nmLbuf + sizeof(nmLbuf) - 1; *s++ = ch; while(!issep(GETC(ch)) && ch!=EOF) { if (s >= se) { nmLbuf_ovfl: return *rvp = err__fl(f__elist->cierr,131,what); } *s++ = ch; if (ch != '=') continue; if (dot) return *rvp = err__fl(f__elist->cierr,112,what); got_eq: *s = 0; nmL_getc_save = l_getc; l_getc = nmL_getc; nmL_ungetc_save = l_ungetc; l_ungetc = nmL_ungetc; nmLbuf[1] = *(nmL_next = nmLbuf) = ','; *rvp = f__lcount = 0; return 1; } if (dot) goto done; for(;;) { if (s >= se) goto nmLbuf_ovfl; *s++ = ch; if (!isblnk(ch)) break; if (GETC(ch) == EOF) goto done; } if (ch == '=') goto got_eq; done: Ungetc(ch, f__cf); return 0; } static int l_L(void) { int ch, rv, sawdot; if(f__lcount>0) return(0); f__lcount = 1; f__ltype=0; GETC(ch); if(isdigit(ch)) { rd_count(ch); if(GETC(ch)!='*') if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"no star"); else err(f__elist->cierr,(EOF),"lread"); GETC(ch); } sawdot = 0; if(ch == '.') { sawdot = 1; GETC(ch); } switch(ch) { case 't': case 'T': if (nml_read && Lfinish(ch, sawdot, &rv)) return rv; f__lx=1; break; case 'f': case 'F': if (nml_read && Lfinish(ch, sawdot, &rv)) return rv; f__lx=0; break; default: if(isblnk(ch) || issep(ch) || ch==EOF) { (void) Ungetc(ch,f__cf); return(0); } if (nml_read > 1) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } errfl(f__elist->cierr,112,"logical"); } f__ltype=TYLONG; while(!issep(GETC(ch)) && ch!=EOF); Ungetc(ch, f__cf); return(0); } #define BUFSIZE 128 static int l_CHAR(void) { int ch,size,i; static char rafail[] = "realloc failure"; char quote,*p; if(f__lcount>0) return(0); f__ltype=0; if(f__lchar!=NULL) free(f__lchar); size=BUFSIZE; p=f__lchar = (char *)malloc((unsigned int)size); if(f__lchar == NULL) errfl(f__elist->cierr,113,"no space"); GETC(ch); if(isdigit(ch)) { /* allow Fortran 8x-style unquoted string... */ /* either find a repetition count or the string */ f__lcount = ch - '0'; *p++ = ch; for(i = 1;;) { switch(GETC(ch)) { case '*': if (f__lcount == 0) { f__lcount = 1; #ifndef F8X_NML_ELIDE_QUOTES if (nml_read) goto no_quote; #endif goto noquote; } p = f__lchar; goto have_lcount; case ',': case ' ': case '\t': case '\n': case '/': Ungetc(ch,f__cf); /* no break */ case EOF: f__lcount = 1; f__ltype = TYCHAR; return *p = 0; } if (!isdigit(ch)) { f__lcount = 1; #ifndef F8X_NML_ELIDE_QUOTES if (nml_read) { no_quote: errfl(f__elist->cierr,112, "undelimited character string"); } #endif goto noquote; } *p++ = ch; f__lcount = 10*f__lcount + ch - '0'; if (++i == size) { f__lchar = (char *)realloc(f__lchar, (unsigned int)(size += BUFSIZE)); if(f__lchar == NULL) errfl(f__elist->cierr,113,rafail); p = f__lchar + i; } } } else (void) Ungetc(ch,f__cf); have_lcount: if(GETC(ch)=='\'' || ch=='"') quote=ch; else if(isblnk(ch) || (issep(ch) && ch != '\n') || ch==EOF) { Ungetc(ch,f__cf); return 0; } #ifndef F8X_NML_ELIDE_QUOTES else if (nml_read > 1) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } #endif else { /* Fortran 8x-style unquoted string */ *p++ = ch; for(i = 1;;) { switch(GETC(ch)) { case ',': case ' ': case '\t': case '\n': case '/': Ungetc(ch,f__cf); /* no break */ case EOF: f__ltype = TYCHAR; return *p = 0; } noquote: *p++ = ch; if (++i == size) { f__lchar = (char *)realloc(f__lchar, (unsigned int)(size += BUFSIZE)); if(f__lchar == NULL) errfl(f__elist->cierr,113,rafail); p = f__lchar + i; } } } f__ltype=TYCHAR; for(i=0;;) { while(GETC(ch)!=quote && ch!='\n' && ch!=EOF && ++icierr,113,rafail); p=f__lchar+i-1; *p++ = ch; } else if(ch==EOF) return(EOF); else if(ch=='\n') { if(*(p-1) != '\\') continue; i--; p--; if(++iciunit]; if(a->ciunit>=MXUNIT || a->ciunit<0) err(a->cierr,101,"stler"); f__scale=f__recpos=0; f__elist=a; if(f__curunit->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) err(a->cierr,102,"lio"); f__cf=f__curunit->ufd; if(!f__curunit->ufmt) err(a->cierr,103,"lio") return(0); } int l_read(ftnint *number, char *ptr, ftnlen len, ftnint type) { #define Ptr ((flex *)ptr) int i,n,ch; doublereal *yy; real *xx; for(i=0;i<*number;i++) { if(f__lquit) return(0); if(l_eof) err(f__elist->ciend, EOF, "list in") if(f__lcount == 0) { f__ltype = 0; for(;;) { GETC(ch); switch(ch) { case EOF: err(f__elist->ciend,(EOF),"list in") case ' ': case '\t': case '\n': continue; case '/': f__lquit = 1; goto loopend; case ',': f__lcount = 1; goto loopend; default: (void) Ungetc(ch, f__cf); goto rddata; } } } rddata: switch((int)type) { case TYINT1: case TYSHORT: case TYLONG: #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT ERR(l_R(0,1)); break; #endif case TYREAL: case TYDREAL: ERR(l_R(0,0)); break; #ifdef TYQUAD case TYQUAD: n = l_R(0,2); if (n) return n; break; #endif case TYCOMPLEX: case TYDCOMPLEX: ERR(l_C()); break; case TYLOGICAL1: case TYLOGICAL2: case TYLOGICAL: ERR(l_L()); break; case TYCHAR: ERR(l_CHAR()); break; } while (GETC(ch) == ' ' || ch == '\t'); if (ch != ',' || f__lcount > 1) Ungetc(ch,f__cf); loopend: if(f__lquit) return(0); if(f__cf && ferror(f__cf)) { clearerr(f__cf); errfl(f__elist->cierr,errno,"list in"); } if(f__ltype==0) goto bump; switch((int)type) { case TYINT1: case TYLOGICAL1: Ptr->flchar = (char)f__lx; break; case TYLOGICAL2: case TYSHORT: Ptr->flshort = (short)f__lx; break; case TYLOGICAL: case TYLONG: Ptr->flint = (ftnint)f__lx; break; #ifdef Allow_TYQUAD case TYQUAD: if (!(Ptr->fllongint = f__llx)) Ptr->fllongint = f__lx; break; #endif case TYREAL: Ptr->flreal=f__lx; break; case TYDREAL: Ptr->fldouble=f__lx; break; case TYCOMPLEX: xx=(real *)ptr; *xx++ = f__lx; *xx = f__ly; break; case TYDCOMPLEX: yy=(doublereal *)ptr; *yy++ = f__lx; *yy = f__ly; break; case TYCHAR: b_char(f__lchar,ptr,len); break; } bump: if(f__lcount>0) f__lcount--; ptr += len; if (nml_read) nml_read++; } return(0); #undef Ptr } integer s_rsle(cilist *a) { int n; f__reading=1; f__external=1; f__formatted=1; if(n=c_le(a)) return(n); f__lioproc = l_read; f__lquit = 0; f__lcount = 0; l_eof = 0; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); if(f__curunit->uend) err(f__elist->ciend,(EOF),"read start"); l_getc = t_getc; l_ungetc = un_getc; f__doend = xrd_SL; return(0); } blis-1.1/blastest/f2c/lwrite.c000066400000000000000000000116141474157777200163010ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" #include "lio.h" #include "arith.h" ftnint L_len; int f__Aquote; static void donewrec(void) { if (f__recpos) (*f__donewrec)(); } static void lwrt_I(longint n) { char *p; int ndigit, sign; p = f__icvt(n, &ndigit, &sign, 10); if(f__recpos + ndigit >= L_len) donewrec(); PUT(' '); if (sign) PUT('-'); while(*p) PUT(*p++); } static void lwrt_L(ftnint n, ftnlen len) { if(f__recpos+LLOGW>=L_len) donewrec(); wrt_L((Uint *)&n,LLOGW, len); } static void lwrt_A(char *p, ftnlen len) { int a; char *p1, *pe; a = 0; pe = p + len; if (f__Aquote) { a = 3; if (len > 1 && p[len-1] == ' ') { while(--len > 1 && p[len-1] == ' '); pe = p + len; } p1 = p; while(p1 < pe) if (*p1++ == '\'') a++; } if(f__recpos+len+a >= L_len) donewrec(); if (a #ifndef OMIT_BLANK_CC || !f__recpos #endif ) PUT(' '); if (a) { PUT('\''); while(p < pe) { if (*p == '\'') PUT('\''); PUT(*p++); } PUT('\''); } else while(p < pe) PUT(*p++); } static int l_g(char *buf, double n) { register char *b, c, c1; b = buf; *b++ = ' '; if (n < 0) { *b++ = '-'; n = -n; } else *b++ = ' '; if (n == 0) { #ifdef SIGNED_ZEROS if (signbit(n)) *b++ = '-'; #endif *b++ = '0'; *b++ = '.'; *b = 0; goto f__ret; } sprintf(b, LGFMT, n); switch(*b) { #ifndef WANT_LEAD_0 case '0': while(b[0] = b[1]) b++; break; #endif case 'i': case 'I': /* Infinity */ case 'n': case 'N': /* NaN */ while(*++b); break; default: /* Fortran 77 insists on having a decimal point... */ for(;; b++) switch(*b) { case 0: *b++ = '.'; *b = 0; goto f__ret; case '.': while(*++b); goto f__ret; case 'E': for(c1 = '.', c = 'E'; *b = c1; c1 = c, c = *++b); goto f__ret; } } f__ret: return b - buf; } static void l_put(register char *s) { #ifdef KR_headers register void (*pn)() = f__putn; #else register void (*pn)(int) = f__putn; #endif register int c; while(c = *s++) (*pn)(c); } static void lwrt_F(double n) { char buf[LEFBL]; if(f__recpos + l_g(buf,n) >= L_len) donewrec(); l_put(buf); } static void lwrt_C(double a, double b) { char *ba, *bb, bufa[LEFBL], bufb[LEFBL]; int al, bl; al = l_g(bufa, a); for(ba = bufa; *ba == ' '; ba++) --al; bl = l_g(bufb, b) + 1; /* intentionally high by 1 */ for(bb = bufb; *bb == ' '; bb++) --bl; if(f__recpos + al + bl + 3 >= L_len) donewrec(); #ifdef OMIT_BLANK_CC else #endif PUT(' '); PUT('('); l_put(ba); PUT(','); if (f__recpos + bl >= L_len) { (*f__donewrec)(); #ifndef OMIT_BLANK_CC PUT(' '); #endif } l_put(bb); PUT(')'); } int l_write(ftnint *number, char *ptr, ftnlen len, ftnint type) { #define Ptr ((flex *)ptr) int i; longint x; double y,z; real *xx; doublereal *yy; for(i=0;i< *number; i++) { switch((int)type) { default: f__fatal(117,"unknown type in lio"); case TYINT1: x = Ptr->flchar; goto xint; case TYSHORT: x=Ptr->flshort; goto xint; #ifdef Allow_TYQUAD case TYQUAD: x = Ptr->fllongint; goto xint; #endif case TYLONG: x=Ptr->flint; xint: lwrt_I(x); break; case TYREAL: y=Ptr->flreal; goto xfloat; case TYDREAL: y=Ptr->fldouble; xfloat: lwrt_F(y); break; case TYCOMPLEX: xx= &Ptr->flreal; y = *xx++; z = *xx; goto xcomplex; case TYDCOMPLEX: yy = &Ptr->fldouble; y= *yy++; z = *yy; xcomplex: lwrt_C(y,z); break; case TYLOGICAL1: x = Ptr->flchar; goto xlog; case TYLOGICAL2: x = Ptr->flshort; goto xlog; case TYLOGICAL: x = Ptr->flint; xlog: lwrt_L(Ptr->flint, len); break; case TYCHAR: lwrt_A(ptr,len); break; } ptr += len; } return(0); } blis-1.1/blastest/f2c/mod.c000066400000000000000000000037241474157777200155550ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_mod(const short *a, const short *b) { return( *a % *b); } integer i_mod(const integer *a, const integer *b) { return( *a % *b); } double r_mod(real *x, real *y) { double quotient; if( (quotient = (double)*x / *y) >= 0) quotient = floor(quotient); else quotient = -floor(-quotient); return(*x - (*y) * quotient ); } double d_mod(const doublereal *x, const doublereal *y) { double quotient; if( (quotient = *x / *y) >= 0) quotient = floor(quotient); else quotient = -floor(-quotient); return(*x - (*y) * quotient ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/nint.c000066400000000000000000000033231474157777200157410ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double d_nint(const doublereal *x) { return( (*x)>=0 ? floor(*x + .5) : -floor(.5 - *x) ); } shortint h_nint(const real *x) { return (shortint)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); } integer i_nint(const real *x) { return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); } double r_nint(real *x) { return( (*x)>=0 ? floor(*x + .5) : -floor(.5 - *x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/open.c000066400000000000000000000141211474157777200157300ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #ifndef NON_UNIX_STDIO #include #endif #ifdef _MSC_VER #include #define access _access #endif #include "f2c.h" #include "fio.h" const char *f__r_mode[2] = {"rb", "r"}; const char *f__w_mode[4] = {"wb", "w", "r+b", "r+"}; static char f__buf0[400], *f__buf = f__buf0; static int f__buflen = (int)sizeof(f__buf0); static void f__bufadj(int n, int c) { unsigned int len; char *nbuf, *s, *t, *te; if (f__buf == f__buf0) f__buflen = 1024; while(f__buflen <= n) f__buflen <<= 1; len = (unsigned int)f__buflen; if (len != f__buflen || !(nbuf = (char*)malloc(len))) { f__fatal(113, "malloc failure"); } else { s = nbuf; t = f__buf; te = t + c; while (t < te) *s++ = *t++; if (f__buf != f__buf0) free(f__buf); f__buf = nbuf; } } int f__putbuf(int c) { char *s, *se; int n; if (f__hiwater > f__recpos) f__recpos = f__hiwater; n = f__recpos + 1; if (n >= f__buflen) f__bufadj(n, f__recpos); s = f__buf; se = s + f__recpos; if (c) *se++ = c; *se = 0; for(;;) { fputs(s, f__cf); s += strlen(s); if (s >= se) break; /* normally happens the first time */ putc(*s++, f__cf); } return 0; } void x_putc(int c) { if (f__recpos >= f__buflen) f__bufadj(f__recpos, f__buflen); f__buf[f__recpos++] = c; } #define opnerr(f,m,s) {if(f) errno= m; else opn_err(m,s,a); return(m);} static void opn_err(int m, const char *s, olist *a) { if (a->ofnm) { /* supply file name to error message */ if (a->ofnmlen >= f__buflen) f__bufadj((int)a->ofnmlen, 0); g_char(a->ofnm, a->ofnmlen, f__curunit->ufnm = f__buf); } f__fatal(m, s); } integer f_open(olist *a) { unit *b; integer rv; char buf[256], *s; cllist x; int ufmt; FILE *tf; #ifndef NON_UNIX_STDIO int n; #endif f__external = 1; if(a->ounit>=MXUNIT || a->ounit<0) err(a->oerr,101,"open") if (!f__init) f_init(); f__curunit = b = &f__units[a->ounit]; if(b->ufd) { if(a->ofnm==0) { same: if (a->oblnk) b->ublnk = *a->oblnk == 'z' || *a->oblnk == 'Z'; return(0); } #ifdef NON_UNIX_STDIO if (b->ufnm && strlen(b->ufnm) == a->ofnmlen && !strncmp(b->ufnm, a->ofnm, (unsigned)a->ofnmlen)) goto same; #else g_char(a->ofnm,a->ofnmlen,buf); if (f__inode(buf,&n) == b->uinode && n == b->udev) goto same; #endif x.cunit=a->ounit; x.csta=0; x.cerr=a->oerr; if ((rv = f_clos(&x)) != 0) return rv; } b->url = (int)a->orl; b->ublnk = a->oblnk && (*a->oblnk == 'z' || *a->oblnk == 'Z'); if(a->ofm==0) { if(b->url>0) b->ufmt=0; else b->ufmt=1; } else if(*a->ofm=='f' || *a->ofm == 'F') b->ufmt=1; else b->ufmt=0; ufmt = b->ufmt; #ifdef url_Adjust if (b->url && !ufmt) url_Adjust(b->url); #endif if (a->ofnm) { g_char(a->ofnm,a->ofnmlen,buf); if (!buf[0]) opnerr(a->oerr,107,"open") } else sprintf(buf, "fort.%ld", (long)a->ounit); b->uscrtch = 0; b->uend=0; b->uwrt = 0; b->ufd = 0; b->urw = 3; switch(a->osta ? *a->osta : 'u') { case 'o': case 'O': if (access(buf,0)) opnerr(a->oerr,errno,"open") break; case 's': case 'S': b->uscrtch=1; #ifdef HAVE_TMPFILE if (!(b->ufd = tmpfile())) opnerr(a->oerr,errno,"open") b->ufnm = 0; #ifndef NON_UNIX_STDIO b->uinode = b->udev = -1; #endif b->useek = 1; return 0; #else (void) strcpy(buf,"tmp.FXXXXXX"); (void) mktemp(buf); goto replace; #endif case 'n': case 'N': if (!access(buf,0)) opnerr(a->oerr,128,"open") /* no break */ case 'r': /* Fortran 90 replace option */ case 'R': #ifndef HAVE_TMPFILE replace: #endif if (tf = fopen(buf,f__w_mode[0])) fclose(tf); } b->ufnm=(char *) malloc((unsigned int)(strlen(buf)+1)); if(b->ufnm==NULL) opnerr(a->oerr,113,"no space"); (void) strcpy(b->ufnm,buf); if ((s = a->oacc) && b->url) ufmt = 0; if(!(tf = fopen(buf, f__w_mode[ufmt|2]))) { if (tf = fopen(buf, f__r_mode[ufmt])) b->urw = 1; else if (tf = fopen(buf, f__w_mode[ufmt])) { b->uwrt = 1; b->urw = 2; } else err(a->oerr, errno, "open"); } b->useek = f__canseek(b->ufd = tf); #ifndef NON_UNIX_STDIO if((b->uinode = f__inode(buf,&b->udev)) == -1) opnerr(a->oerr,108,"open") #endif if(b->useek) if (a->orl) rewind(b->ufd); else if ((s = a->oacc) && (*s == 'a' || *s == 'A') && FSEEK(b->ufd, 0L, SEEK_END)) opnerr(a->oerr,129,"open"); return(0); } int fk_open(int seq, int fmt, ftnint n) { char nbuf[10]; olist a; // FGVZ: gcc 7.3 outputs a warning that the integer value corresponding // to the "%ld" format specifier could (in theory) use up 11 bytes in a // string that only allows for five additional bytes. I use the modulo // operator to reassure gcc that the integer will be very small. //(void) sprintf(nbuf,"fort.%ld",(long)n); (void) sprintf(nbuf,"fort.%ld",(long)n % 20); a.oerr=1; a.ounit=n; a.ofnm=nbuf; a.ofnmlen=strlen(nbuf); a.osta=NULL; a.oacc= (char*)(seq==SEQ?"s":"d"); a.ofm = (char*)(fmt==FMT?"f":"u"); a.orl = seq==DIR?1:0; a.oblnk=NULL; return(f_open(&a)); } blis-1.1/blastest/f2c/pow.c000066400000000000000000000066141474157777200156040ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* Integer */ shortint pow_hh(shortint *ap, shortint *bp) { return (shortint)(pow(*ap, *bp)); } integer pow_ii(integer *ap, integer *bp) { return (integer)(pow(*ap, *bp)); } #ifdef INTEGER_STAR_8 longint pow_qq(longint *ap, longint *bp) { return (longint)(pow(*ap, *bp)); } #endif /* Double */ double pow_ri(real *ap, integer *bp) { return (pow(*ap, *bp)); } double pow_dd(doublereal *ap, doublereal *bp) { return (pow(*ap, *bp)); } double pow_di(doublereal *ap, integer *bp) { return (pow(*ap, *bp)); } /* Complex */ void pow_ci(complex *p, complex *a, integer *b) { doublecomplex p1, a1; a1.r = a->r; a1.i = a->i; pow_zi(&p1, &a1, b); p->r = p1.r; p->i = p1.i; } void pow_zz(doublecomplex *r, doublecomplex *a, doublecomplex *b) { double logr, logi, x, y; logr = log( hypot(a->r, a->i) ); logi = atan2(a->i, a->r); x = exp( logr * b->r - logi * b->i ); y = logr * b->i + logi * b->r; r->r = x * cos(y); r->i = x * sin(y); } void pow_zi(doublecomplex *p, doublecomplex *a, integer *b) { integer n; unsigned long u; double t; doublecomplex q, x; static doublecomplex one = {1.0, 0.0}; n = *b; q.r = 1; q.i = 0; if(n == 0) goto done; if(n < 0) { n = -n; z_div(&x, &one, a); } else { x.r = a->r; x.i = a->i; } for(u = n; ; ) { if(u & 01) { t = q.r * x.r - q.i * x.i; q.i = q.r * x.i + q.i * x.r; q.r = t; } if(u >>= 1) { t = x.r * x.r - x.i * x.i; x.i = 2 * x.r * x.i; x.r = t; } else break; } done: p->i = q.i; p->r = q.r; } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/prod.c000066400000000000000000000025761474157777200157460ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double d_prod(const real *x, const real *y) { return( (double)(*x) * (double)(*y) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/rdfmt.c000066400000000000000000000220711474157777200161060ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #include "f2c.h" #include "fio.h" #include "fmt.h" #include "fp.h" static int rd_Z(Uint *n, int w, ftnlen len) { long x[9]; char *s, *s0, *s1, *se, *t; const char *sc; int ch, i, w1, w2; static char hex[256]; static int one = 1; int bad = 0; if (!hex['0']) { sc = "0123456789"; while(ch = *sc++) hex[ch] = ch - '0' + 1; sc = "ABCDEF"; while(ch = *sc++) hex[ch] = hex[ch + 'a' - 'A'] = ch - 'A' + 11; } s = s0 = (char *)x; s1 = (char *)&x[4]; se = (char *)&x[8]; if (len > 4*sizeof(long)) return errno = 117; while (w) { GET(ch); if (ch==',' || ch=='\n') break; w--; if (ch > ' ') { if (!hex[ch & 0xff]) bad++; *s++ = ch; if (s == se) { /* discard excess characters */ for(t = s0, s = s1; t < s1;) *t++ = *s++; s = s1; } } } if (bad) return errno = 115; w = (int)len; w1 = s - s0; w2 = w1+1 >> 1; t = (char *)n; if (*(char *)&one) { /* little endian */ t += w - 1; i = -1; } else i = 1; for(; w > w2; t += i, --w) *t = 0; if (!w) return 0; if (w < w2) s0 = s - (w << 1); else if (w1 & 1) { *t = hex[*s0++ & 0xff] - 1; if (!--w) return 0; t += i; } do { *t = hex[*s0 & 0xff]-1 << 4 | hex[s0[1] & 0xff]-1; t += i; s0 += 2; } while(--w); return 0; } static int rd_I(Uint *n, int w, ftnlen len, register int base) { int ch, sign; longint x = 0; if (w <= 0) goto have_x; for(;;) { GET(ch); if (ch != ' ') break; if (!--w) goto have_x; } sign = 0; switch(ch) { case ',': case '\n': w = 0; goto have_x; case '-': sign = 1; case '+': break; default: if (ch >= '0' && ch <= '9') { x = ch - '0'; break; } goto have_x; } while(--w) { GET(ch); if (ch >= '0' && ch <= '9') { x = x*base + ch - '0'; continue; } if (ch != ' ') { if (ch == '\n' || ch == ',') w = 0; break; } if (f__cblank) x *= base; } if (sign) x = -x; have_x: if(len == sizeof(integer)) n->il=x; else if(len == sizeof(char)) n->ic = (char)x; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) n->ili = x; #endif else n->is = (short)x; if (w) { while(--w) GET(ch); return errno = 115; } return 0; } static int rd_L(ftnint *n, int w, ftnlen len) { int ch, dot, lv; if (w <= 0) goto bad; for(;;) { GET(ch); --w; if (ch != ' ') break; if (!w) goto bad; } dot = 0; retry: switch(ch) { case '.': if (dot++ || !w) goto bad; GET(ch); --w; goto retry; case 't': case 'T': lv = 1; break; case 'f': case 'F': lv = 0; break; default: bad: for(; w > 0; --w) GET(ch); /* no break */ case ',': case '\n': return errno = 116; } switch(len) { case sizeof(char): *(char *)n = (char)lv; break; case sizeof(short): *(short *)n = (short)lv; break; default: *n = lv; } while(w-- > 0) { GET(ch); if (ch == ',' || ch == '\n') break; } return 0; } static int rd_F(ufloat *p, int w, int d, ftnlen len) { char s[FMAX+EXPMAXDIGS+4]; register int ch; register char *sp, *spe, *sp1; double x; int scale1, se; long e, exp; sp1 = sp = s; spe = sp + FMAX; exp = -d; x = 0.; do { GET(ch); w--; } while (ch == ' ' && w); switch(ch) { case '-': *sp++ = ch; sp1++; spe++; case '+': if (!w) goto zero; --w; GET(ch); } while(ch == ' ') { blankdrop: if (!w--) goto zero; GET(ch); } while(ch == '0') { if (!w--) goto zero; GET(ch); } if (ch == ' ' && f__cblank) goto blankdrop; scale1 = f__scale; while(isdigit(ch)) { digloop1: if (sp < spe) *sp++ = ch; else ++exp; digloop1e: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) { ch = '0'; goto digloop1; } goto digloop1e; } if (ch == '.') { exp += d; if (!w--) goto done; GET(ch); if (sp == sp1) { /* no digits yet */ while(ch == '0') { skip01: --exp; skip0: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) goto skip01; goto skip0; } } while(isdigit(ch)) { digloop2: if (sp < spe) { *sp++ = ch; --exp; } digloop2e: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) { ch = '0'; goto digloop2; } goto digloop2e; } } switch(ch) { default: break; case '-': se = 1; goto signonly; case '+': se = 0; goto signonly; case 'e': case 'E': case 'd': case 'D': if (!w--) goto bad; GET(ch); while(ch == ' ') { if (!w--) goto bad; GET(ch); } se = 0; switch(ch) { case '-': se = 1; case '+': signonly: if (!w--) goto bad; GET(ch); } while(ch == ' ') { if (!w--) goto bad; GET(ch); } if (!isdigit(ch)) goto bad; e = ch - '0'; for(;;) { if (!w--) { ch = '\n'; break; } GET(ch); if (!isdigit(ch)) { if (ch == ' ') { if (f__cblank) ch = '0'; else continue; } else break; } e = 10*e + ch - '0'; if (e > EXPMAX && sp > sp1) goto bad; } if (se) exp -= e; else exp += e; scale1 = 0; } switch(ch) { case '\n': case ',': break; default: bad: return (errno = 115); } done: if (sp > sp1) { while(*--sp == '0') ++exp; if (exp -= scale1) sprintf(sp+1, "e%ld", exp); else sp[1] = 0; x = atof(s); } zero: if (len == sizeof(real)) p->pf = x; else p->pd = x; return(0); } static int rd_A(char *p, ftnlen len) { int i,ch; for(i=0;i=len) { for(i=0;i0;f__cursor--) if((ch=(*f__getn)())<0) return(ch); if(f__cursor<0) { if(f__recpos+f__cursor < 0) /*err(elist->cierr,110,"fmt")*/ f__cursor = -f__recpos; /* is this in the standard? */ if(f__external == 0) { extern char *f__icptr; f__icptr += f__cursor; } else if(f__curunit && f__curunit->useek) (void) FSEEK(f__cf, f__cursor,SEEK_CUR); else err(f__elist->cierr,106,"fmt"); f__recpos += f__cursor; f__cursor=0; } switch(p->op) { default: fprintf(stderr,"rd_ed, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case IM: case I: ch = rd_I((Uint *)ptr,p->p1,len, 10); break; /* O and OM don't work right for character, double, complex, */ /* or doublecomplex, and they differ from Fortran 90 in */ /* showing a minus sign for negative values. */ case OM: case O: ch = rd_I((Uint *)ptr, p->p1, len, 8); break; case L: ch = rd_L((ftnint *)ptr,p->p1,len); break; case A: ch = rd_A(ptr,len); break; case AW: ch = rd_AW(ptr,p->p1,len); break; case E: case EE: case D: case G: case GE: case F: ch = rd_F((ufloat *)ptr,p->p1,p->p2.i[0],len); break; /* Z and ZM assume 8-bit bytes. */ case ZM: case Z: ch = rd_Z((Uint *)ptr, p->p1, len); break; } if(ch == 0) return(ch); else if(ch == EOF) return(EOF); if (f__cf) clearerr(f__cf); return(errno); } int rd_ned(struct syl *p) { switch(p->op) { default: fprintf(stderr,"rd_ned, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case APOS: return(rd_POS(p->p2.s)); case H: return(rd_H(p->p1,p->p2.s)); case SLASH: return((*f__donewrec)()); case TR: case X: f__cursor += p->p1; return(1); case T: f__cursor=p->p1-f__recpos - 1; return(1); case TL: f__cursor -= p->p1; if(f__cursor < -f__recpos) /* TL1000, 1X */ f__cursor = -f__recpos; return(1); } } blis-1.1/blastest/f2c/rewind.c000066400000000000000000000030601474157777200162570ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ //#include #include "f2c.h" #include "fio.h" integer f_rew(alist *a) { unit *b; if(a->aunit>=MXUNIT || a->aunit<0) err(a->aerr,101,"rewind"); b = &f__units[a->aunit]; if(b->ufd == NULL || b->uwrt == 3) return(0); if(!b->useek) err(a->aerr,106,"rewind") if(b->uwrt) { (void) t_runc(a); b->uwrt = 3; } rewind(b->ufd); b->uend=0; return 0; } blis-1.1/blastest/f2c/rsfe.c000066400000000000000000000050211474157777200157250ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* read sequential formatted external */ #include #include "f2c.h" #include "fio.h" #include "fmt.h" int xrd_SL(void) { int ch; if(!f__curunit->uend) while((ch=getc(f__cf))!='\n') if (ch == EOF) { f__curunit->uend = 1; break; } f__cursor=f__recpos=0; return 1; } int x_getc(void) { int ch; if(f__curunit->uend) return EOF; ch = getc(f__cf); if(ch!=EOF && ch!='\n') { f__recpos++; return ch; } if(ch=='\n') { (void) ungetc(ch,f__cf); return ch; } if(f__curunit->uend || feof(f__cf)) { errno=0; f__curunit->uend=1; return -1; } return -1; } int x_endp(void) { xrd_SL(); return f__curunit->uend == 1 ? EOF : 0; } int x_rev(void) { (void) xrd_SL(); return 0; } integer s_rsfe(cilist *a) /* start */ { int n; if(!f__init) f_init(); f__reading=1; f__sequential=1; f__formatted=1; f__external=1; if(n=c_sfe(a)) return n; f__elist=a; f__cursor=f__recpos=0; f__scale=0; f__fmtbuf=a->cifmt; f__cf=f__curunit->ufd; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); f__getn= x_getc; f__doed= rd_ed; f__doned= rd_ned; fmt_bg(); f__doend=x_endp; f__donewrec=xrd_SL; f__dorevert=x_rev; f__cblank=f__curunit->ublnk; f__cplus=0; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); if(f__curunit->uend) err(f__elist->ciend,(EOF),"read start"); return 0; } blis-1.1/blastest/f2c/s_cmp.c000066400000000000000000000044241474157777200160750ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* compare two strings */ integer s_cmp(const char *a0, const char *b0, ftnlen la, ftnlen lb) { register unsigned char *a, *aend, *b, *bend; a = (unsigned char *)a0; b = (unsigned char *)b0; aend = a + la; bend = b + lb; if(la <= lb) { while(a < aend) if(*a != *b) return( *a - *b ); else { ++a; ++b; } while(b < bend) if(*b != ' ') return( ' ' - *b ); else ++b; } else { while(b < bend) if(*a == *b) { ++a; ++b; } else return( *a - *b ); while(a < aend) if(*a != ' ') return(*a - ' '); else ++a; } return(0); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/s_copy.c000066400000000000000000000034021474157777200162630ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the * target of an assignment to appear on its right-hand side (contrary * to the Fortran 77 Standard, but in accordance with Fortran 90), * as in a(2:5) = a(4:7) . */ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* assign strings: a = b */ int s_copy(char *a, const char *b, ftnlen la, ftnlen lb) { if (la <= lb) { memmove(a, b, la); } else { memset((char *)memmove(a, b, lb) + lb, ' ', la - lb); } return 0; } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/s_stop.c000066400000000000000000000032511474157777200163000ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ //#include #include #include #include "f2c.h" int s_stop(char *s, ftnlen n) { int i; if(n > 0) { fprintf(stderr, "STOP "); for(i = 0; i #include "f2c.h" #include "fio.h" integer e_rsfe(void) { int n; n=en_fio(); f__fmtbuf=NULL; return(n); } int c_sfe(cilist *a) { unit *p; f__curunit = p = &f__units[a->ciunit]; if(a->ciunit >= MXUNIT || a->ciunit<0) err(a->cierr,101,"startio"); if(p->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) err(a->cierr,114,"sfe") if(!p->ufmt) err(a->cierr,102,"sfe") return(0); } integer e_wsfe(void) { int n = en_fio(); f__fmtbuf = NULL; #ifdef ALWAYS_FLUSH if (!n && fflush(f__cf)) err(f__elist->cierr, errno, "write end"); #endif return n; } blis-1.1/blastest/f2c/sig_die.c000066400000000000000000000032231474157777200163730ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #include #include "f2c.h" #ifndef SIGIOT #ifdef SIGABRT #define SIGIOT SIGABRT #endif #endif void sig_die(const char *s, int kill) { /* print error message, then clear buffers */ fprintf(stderr, "%s\n", s); if(kill) { fflush(stderr); f_exit(); fflush(stderr); /* now get a core */ #ifdef SIGIOT signal(SIGIOT, SIG_DFL); #endif abort(); } else { f_exit(); exit(1); } } blis-1.1/blastest/f2c/sign.c000066400000000000000000000035171474157777200157360ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_sign(const shortint *a, const shortint *b) { shortint x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } integer i_sign(const integer *a, const integer *b) { integer x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } double r_sign(real *a, real *b) { double x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } double d_sign(const doublereal *a, const doublereal *b) { double x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/sin.c000066400000000000000000000033511474157777200155630ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_sin(real *x) { return( sin(*x) ); } double d_sin(const doublereal *x) { return( sin(*x) ); } void c_sin(complex *r, complex *z) { double zi = z->i, zr = z->r; r->r = sin(zr) * cosh(zi); r->i = cos(zr) * sinh(zi); } void z_sin(doublecomplex *r, doublecomplex *z) { double zi = z->i, zr = z->r; r->r = sin(zr) * cosh(zi); r->i = cos(zr) * sinh(zi); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/sinh.c000066400000000000000000000026451474157777200157400ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_sinh(real *x) { return( sinh(*x) ); } double d_sinh(const doublereal *x) { return( sinh(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/sqrt.c000066400000000000000000000030331474157777200157600ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_sqrt(real *x) { return ( sqrt(*x) ); } double d_sqrt(const doublereal *x) { return ( sqrt(*x) ); } void c_sqrt(complex *r, complex *z) { } void z_sqrt(doublecomplex *r, doublecomplex *z) { } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/tan.c000066400000000000000000000026411474157777200155550ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_tan(real *x) { return( tan(*x) ); } double d_tan(const doublereal *x) { return( tan(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/tanh.c000066400000000000000000000026451474157777200157310ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_tanh(real *x) { return( tanh(*x) ); } double d_tanh(const doublereal *x) { return( tanh(*x) ); } #ifdef __cplusplus } #endif blis-1.1/blastest/f2c/util.c000066400000000000000000000033751474157777200157550ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" void g_char(const char *a, ftnlen alen, char *b) { const char *x = a + alen; char *y = b + alen; for(;; y--) { if (x <= a) { *b = 0; return; } if (*--x != ' ') break; } *y-- = 0; do *y-- = *x; while(x-- > a); } void b_char(const char *a, char *b, ftnlen blen) { int i; for(i=0;i #include #include #include #include "f2c.h" #include "fio.h" #include "arith.h" #include "fmt.h" #include "fp.h" int wrt_E(ufloat *p, int w, int d, int e, ftnlen len) { char buf[FMAX+EXPMAXDIGS+4], *s, *se; int d1, delta, e1, i, sign, signspace; double dd; #ifdef WANT_LEAD_0 int insert0 = 0; #endif int e0 = e; if(e <= 0) e = 2; if(f__scale) { if(f__scale >= d + 2 || f__scale <= -d) goto nogood; } if(f__scale <= 0) --d; if (len == sizeof(real)) dd = p->pf; else dd = p->pd; if (dd < 0.) { signspace = sign = 1; dd = -dd; } else { sign = 0; signspace = (int)f__cplus; if (!dd) { #ifdef SIGNED_ZEROS if (signbit(dd)) signspace = sign = 1; #endif dd = 0.; /* avoid -0 */ } } delta = w - (2 /* for the . and the d adjustment above */ + 2 /* for the E+ */ + signspace + d + e); #ifdef WANT_LEAD_0 if (f__scale <= 0 && delta > 0) { delta--; insert0 = 1; } else #endif if (delta < 0) { nogood: while(--w >= 0) PUT('*'); return(0); } if (f__scale < 0) d += f__scale; if (d > FMAX) { d1 = d - FMAX; d = FMAX; } else d1 = 0; sprintf(buf,"%#.*E", d, dd); /* check for NaN, Infinity */ if (!isdigit(buf[0])) { switch(buf[0]) { case 'n': case 'N': signspace = 0; /* no sign for NaNs */ } delta = w - strlen(buf) - signspace; if (delta < 0) goto nogood; while(--delta >= 0) PUT(' '); if (signspace) PUT(sign ? '-' : '+'); for(s = buf; *s; s++) PUT(*s); return 0; } se = buf + d + 3; #ifdef GOOD_SPRINTF_EXPONENT /* When possible, exponent has 2 digits. */ if (f__scale != 1 && dd) sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); #else if (dd) sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); else strcpy(se, "+00"); #endif s = ++se; if (e < 2) { if (*s != '0') goto nogood; } /* accommodate 3 significant digits in exponent */ if (s[2]) { #ifdef Pedantic if (!e0 && !s[3]) for(s -= 2, e1 = 2; s[0] = s[1]; s++); /* Pedantic gives the behavior that Fortran 77 specifies, */ /* i.e., requires that E be specified for exponent fields */ /* of more than 3 digits. With Pedantic undefined, we get */ /* the behavior that Cray displays -- you get a bigger */ /* exponent field if it fits. */ #else if (!e0) { for(s -= 2, e1 = 2; s[0] = s[1]; s++) #ifdef CRAY delta--; if ((delta += 4) < 0) goto nogood #endif ; } #endif else if (e0 >= 0) goto shift; else e1 = e; } else shift: for(s += 2, e1 = 2; *s; ++e1, ++s) if (e1 >= e) goto nogood; while(--delta >= 0) PUT(' '); if (signspace) PUT(sign ? '-' : '+'); s = buf; i = f__scale; if (f__scale <= 0) { #ifdef WANT_LEAD_0 if (insert0) PUT('0'); #endif PUT('.'); for(; i < 0; ++i) PUT('0'); PUT(*s); s += 2; } else if (f__scale > 1) { PUT(*s); s += 2; while(--i > 0) PUT(*s++); PUT('.'); } if (d1) { se -= 2; while(s < se) PUT(*s++); se += 2; do PUT('0'); while(--d1 > 0); } while(s < se) PUT(*s++); if (e < 2) PUT(s[1]); else { while(++e1 <= e) PUT('0'); while(*s) PUT(*s++); } return 0; } int wrt_F(ufloat *p, int w, int d, ftnlen len) { int d1, sign, n; double x; char *b, buf[MAXINTDIGS+MAXFRACDIGS+4], *s; x= (len==sizeof(real)?p->pf:p->pd); if (d < MAXFRACDIGS) d1 = 0; else { d1 = d - MAXFRACDIGS; d = MAXFRACDIGS; } if (x < 0.) { x = -x; sign = 1; } else { sign = 0; if (!x) { #ifdef SIGNED_ZEROS if (signbit(x)) sign = 2; #endif x = 0.; } } if (n = f__scale) if (n > 0) do x *= 10.; while(--n > 0); else do x *= 0.1; while(++n < 0); n = sprintf(b = buf, "%#.*f", d, x) + d1; #ifndef WANT_LEAD_0 if (buf[0] == '0' && d) { ++b; --n; } #endif if (sign == 1) { /* check for all zeros */ for(s = b;;) { while(*s == '0') s++; switch(*s) { case '.': s++; continue; case 0: sign = 0; } break; } } if (sign || f__cplus) ++n; if (n > w) { #ifdef WANT_LEAD_0 if (buf[0] == '0' && --n == w) ++b; else #endif { while(--w >= 0) PUT('*'); return 0; } } for(w -= n; --w >= 0; ) PUT(' '); if (sign) PUT('-'); else if (f__cplus) PUT('+'); while(n = *b++) PUT(n); while(--d1 >= 0) PUT('0'); return 0; } blis-1.1/blastest/f2c/wrtfmt.c000066400000000000000000000172341474157777200163220ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" static int mv_cur(void) /* shouldn't use fseek because it insists on calling fflush */ /* instead we know too much about stdio */ { int cursor = f__cursor; f__cursor = 0; if(f__external == 0) { if(cursor < 0) { if(f__hiwater < f__recpos) f__hiwater = f__recpos; f__recpos += cursor; f__icptr += cursor; if(f__recpos < 0) err(f__elist->cierr, 110, "left off"); } else if(cursor > 0) { if(f__recpos + cursor >= f__svic->icirlen) err(f__elist->cierr, 110, "recend"); if(f__hiwater <= f__recpos) for(; cursor > 0; cursor--) (*f__putn)(' '); else if(f__hiwater <= f__recpos + cursor) { cursor -= f__hiwater - f__recpos; f__icptr += f__hiwater - f__recpos; f__recpos = f__hiwater; for(; cursor > 0; cursor--) (*f__putn)(' '); } else { f__icptr += cursor; f__recpos += cursor; } } return(0); } if (cursor > 0) { if(f__hiwater <= f__recpos) for(;cursor>0;cursor--) (*f__putn)(' '); else if(f__hiwater <= f__recpos + cursor) { cursor -= f__hiwater - f__recpos; f__recpos = f__hiwater; for(; cursor > 0; cursor--) (*f__putn)(' '); } else { f__recpos += cursor; } } else if (cursor < 0) { if(cursor + f__recpos < 0) err(f__elist->cierr,110,"left off"); if(f__hiwater < f__recpos) f__hiwater = f__recpos; f__recpos += cursor; } return(0); } static int wrt_Z(Uint *n, int w, int minlen, ftnlen len) { register char *s, *se; register int i, w1; static int one = 1; static char hex[] = "0123456789ABCDEF"; s = (char *)n; --len; if (*(char *)&one) { /* little endian */ se = s; s += len; i = -1; } else { se = s + len; i = 1; } for(;; s += i) if (s == se || *s) break; w1 = (i*(se-s) << 1) + 1; if (*s & 0xf0) w1++; if (w1 > w) for(i = 0; i < w; i++) (*f__putn)('*'); else { if ((minlen -= w1) > 0) w1 += minlen; while(--w >= w1) (*f__putn)(' '); while(--minlen >= 0) (*f__putn)('0'); if (!(*s & 0xf0)) { (*f__putn)(hex[*s & 0xf]); if (s == se) return 0; s += i; } for(;; s += i) { (*f__putn)(hex[*s >> 4 & 0xf]); (*f__putn)(hex[*s & 0xf]); if (s == se) break; } } return 0; } static int wrt_I(Uint *n, int w, ftnlen len, register int base) { int ndigit,sign,spare,i; longint x; char *ans; if(len==sizeof(integer)) x=n->il; else if(len == sizeof(char)) x = n->ic; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) x = n->ili; #endif else x=n->is; ans=f__icvt(x,&ndigit,&sign, base); spare=w-ndigit; if(sign || f__cplus) spare--; if(spare<0) for(i=0;iil; else if(len == sizeof(char)) x = n->ic; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) x = n->ili; #endif else x=n->is; ans=f__icvt(x,&ndigit,&sign, base); if(sign || f__cplus) xsign=1; else xsign=0; if(ndigit+xsign>w || m+xsign>w) { for(i=0;i=m) spare=w-ndigit-xsign; else spare=w-m-xsign; for(i=0;iil; else if(sz == sizeof(char)) x = n->ic; else x=n->is; for(i=0;i 0) (*f__putn)(*p++); return(0); } static int wrt_AW(char * p, int w, ftnlen len) { while(w>len) { w--; (*f__putn)(' '); } while(w-- > 0) (*f__putn)(*p++); return(0); } static int wrt_G(ufloat *p, int w, int d, int e, ftnlen len) { double up = 1,x; int i=0,oldscale,n,j; x = len==sizeof(real)?p->pf:p->pd; if(x < 0 ) x = -x; if(x<.1) { if (x != 0.) return(wrt_E(p,w,d,e,len)); i = 1; goto have_i; } for(;i<=d;i++,up*=10) { if(x>=up) continue; have_i: oldscale = f__scale; f__scale = 0; if(e==0) n=4; else n=e+2; i=wrt_F(p,w-n,d-i,len); for(j=0;jop) { default: fprintf(stderr,"w_ed, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case I: return(wrt_I((Uint *)ptr,p->p1,len, 10)); case IM: return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,10)); /* O and OM don't work right for character, double, complex, */ /* or doublecomplex, and they differ from Fortran 90 in */ /* showing a minus sign for negative values. */ case O: return(wrt_I((Uint *)ptr, p->p1, len, 8)); case OM: return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,8)); case L: return(wrt_L((Uint *)ptr,p->p1, len)); case A: return(wrt_A(ptr,len)); case AW: return(wrt_AW(ptr,p->p1,len)); case D: case E: case EE: return(wrt_E((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); case G: case GE: return(wrt_G((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); case F: return(wrt_F((ufloat *)ptr,p->p1,p->p2.i[0],len)); /* Z and ZM assume 8-bit bytes. */ case Z: return(wrt_Z((Uint *)ptr,p->p1,0,len)); case ZM: return(wrt_Z((Uint *)ptr,p->p1,p->p2.i[0],len)); } } int w_ned(struct syl *p) { switch(p->op) { default: fprintf(stderr,"w_ned, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case SLASH: return((*f__donewrec)()); case T: f__cursor = p->p1-f__recpos - 1; return(1); case TL: f__cursor -= p->p1; if(f__cursor < -f__recpos) /* TL1000, 1X */ f__cursor = -f__recpos; return(1); case TR: case X: f__cursor += p->p1; return(1); case APOS: return(wrt_AP(p->p2.s)); case H: return(wrt_H(p->p1,p->p2.s)); } } blis-1.1/blastest/f2c/wsfe.c000066400000000000000000000045101474157777200157340ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /*write sequential formatted external*/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" int x_wSL(void) { int n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return(n == 0); } static int xw_end(void) { int n; if(f__nonl) { f__putbuf(n = 0); fflush(f__cf); } else n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return n; } static int xw_rev(void) { int n = 0; if(f__workdone) { n = f__putbuf('\n'); f__workdone = 0; } f__hiwater = f__recpos = f__cursor = 0; return n; } integer s_wsfe(cilist *a) /*start*/ { int n; if(!f__init) f_init(); f__reading=0; f__sequential=1; f__formatted=1; f__external=1; if(n=c_sfe(a)) return(n); f__elist=a; f__hiwater = f__cursor=f__recpos=0; f__nonl = 0; f__scale=0; f__fmtbuf=a->cifmt; f__cf=f__curunit->ufd; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); f__putn= x_putc; f__doed= w_ed; f__doned= w_ned; f__doend=xw_end; f__dorevert=xw_rev; f__donewrec=x_wSL; fmt_bg(); f__cplus=0; f__cblank=f__curunit->ublnk; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr,errno,"write start"); return(0); } blis-1.1/blastest/f2c/wsle.c000066400000000000000000000034141474157777200157440ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include "f2c.h" #include "fio.h" #include "fmt.h" #include "lio.h" integer s_wsle(cilist *a) { int n; if(n=c_le(a)) return(n); f__reading=0; f__external=1; f__formatted=1; f__putn = x_putc; f__lioproc = l_write; L_len = LINE; f__donewrec = x_wSL; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr, errno, "list output start"); return(0); } integer e_wsle(void) { int n = f__putbuf('\n'); f__recpos=0; #ifdef ALWAYS_FLUSH if (!n && fflush(f__cf)) err(f__elist->cierr, errno, "write end"); #endif return(n); } blis-1.1/blastest/input/000077500000000000000000000000001474157777200153115ustar00rootroot00000000000000blis-1.1/blastest/input/cblat2.in000066400000000000000000000030121474157777200170040ustar00rootroot00000000000000'out.cblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA CGEMV T PUT F FOR NO TEST. SAME COLUMNS. CGBMV T PUT F FOR NO TEST. SAME COLUMNS. CHEMV T PUT F FOR NO TEST. SAME COLUMNS. CHBMV T PUT F FOR NO TEST. SAME COLUMNS. CHPMV T PUT F FOR NO TEST. SAME COLUMNS. CTRMV T PUT F FOR NO TEST. SAME COLUMNS. CTBMV T PUT F FOR NO TEST. SAME COLUMNS. CTPMV T PUT F FOR NO TEST. SAME COLUMNS. CTRSV T PUT F FOR NO TEST. SAME COLUMNS. CTBSV T PUT F FOR NO TEST. SAME COLUMNS. CTPSV T PUT F FOR NO TEST. SAME COLUMNS. CGERC T PUT F FOR NO TEST. SAME COLUMNS. CGERU T PUT F FOR NO TEST. SAME COLUMNS. CHER T PUT F FOR NO TEST. SAME COLUMNS. CHPR T PUT F FOR NO TEST. SAME COLUMNS. CHER2 T PUT F FOR NO TEST. SAME COLUMNS. CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/input/cblat3.in000066400000000000000000000020261474157777200170110ustar00rootroot00000000000000'out.cblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA CGEMM T PUT F FOR NO TEST. SAME COLUMNS. CHEMM T PUT F FOR NO TEST. SAME COLUMNS. CSYMM T PUT F FOR NO TEST. SAME COLUMNS. CTRMM T PUT F FOR NO TEST. SAME COLUMNS. CTRSM T PUT F FOR NO TEST. SAME COLUMNS. CHERK T PUT F FOR NO TEST. SAME COLUMNS. CSYRK T PUT F FOR NO TEST. SAME COLUMNS. CHER2K T PUT F FOR NO TEST. SAME COLUMNS. CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/input/dblat2.in000066400000000000000000000026721474157777200170200ustar00rootroot00000000000000'out.dblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 0.9 VALUES OF BETA DGEMV T PUT F FOR NO TEST. SAME COLUMNS. DGBMV T PUT F FOR NO TEST. SAME COLUMNS. DSYMV T PUT F FOR NO TEST. SAME COLUMNS. DSBMV T PUT F FOR NO TEST. SAME COLUMNS. DSPMV T PUT F FOR NO TEST. SAME COLUMNS. DTRMV T PUT F FOR NO TEST. SAME COLUMNS. DTBMV T PUT F FOR NO TEST. SAME COLUMNS. DTPMV T PUT F FOR NO TEST. SAME COLUMNS. DTRSV T PUT F FOR NO TEST. SAME COLUMNS. DTBSV T PUT F FOR NO TEST. SAME COLUMNS. DTPSV T PUT F FOR NO TEST. SAME COLUMNS. DGER T PUT F FOR NO TEST. SAME COLUMNS. DSYR T PUT F FOR NO TEST. SAME COLUMNS. DSPR T PUT F FOR NO TEST. SAME COLUMNS. DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/input/dblat3.in000066400000000000000000000015621474157777200170160ustar00rootroot00000000000000'out.dblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 1.3 VALUES OF BETA DGEMM T PUT F FOR NO TEST. SAME COLUMNS. DSYMM T PUT F FOR NO TEST. SAME COLUMNS. DTRMM T PUT F FOR NO TEST. SAME COLUMNS. DTRSM T PUT F FOR NO TEST. SAME COLUMNS. DSYRK T PUT F FOR NO TEST. SAME COLUMNS. DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/input/sblat2.in000066400000000000000000000026721474157777200170370ustar00rootroot00000000000000'out.sblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 0.9 VALUES OF BETA SGEMV T PUT F FOR NO TEST. SAME COLUMNS. SGBMV T PUT F FOR NO TEST. SAME COLUMNS. SSYMV T PUT F FOR NO TEST. SAME COLUMNS. SSBMV T PUT F FOR NO TEST. SAME COLUMNS. SSPMV T PUT F FOR NO TEST. SAME COLUMNS. STRMV T PUT F FOR NO TEST. SAME COLUMNS. STBMV T PUT F FOR NO TEST. SAME COLUMNS. STPMV T PUT F FOR NO TEST. SAME COLUMNS. STRSV T PUT F FOR NO TEST. SAME COLUMNS. STBSV T PUT F FOR NO TEST. SAME COLUMNS. STPSV T PUT F FOR NO TEST. SAME COLUMNS. SGER T PUT F FOR NO TEST. SAME COLUMNS. SSYR T PUT F FOR NO TEST. SAME COLUMNS. SSPR T PUT F FOR NO TEST. SAME COLUMNS. SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/input/sblat3.in000066400000000000000000000015621474157777200170350ustar00rootroot00000000000000'out.sblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 1.3 VALUES OF BETA SGEMM T PUT F FOR NO TEST. SAME COLUMNS. SSYMM T PUT F FOR NO TEST. SAME COLUMNS. STRMM T PUT F FOR NO TEST. SAME COLUMNS. STRSM T PUT F FOR NO TEST. SAME COLUMNS. SSYRK T PUT F FOR NO TEST. SAME COLUMNS. SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/input/zblat2.in000066400000000000000000000030121474157777200170330ustar00rootroot00000000000000'out.zblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. ZGERC T PUT F FOR NO TEST. SAME COLUMNS. ZGERU T PUT F FOR NO TEST. SAME COLUMNS. ZHER T PUT F FOR NO TEST. SAME COLUMNS. ZHPR T PUT F FOR NO TEST. SAME COLUMNS. ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/input/zblat3.in000066400000000000000000000020261474157777200170400ustar00rootroot00000000000000'out.zblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. ZHERK T PUT F FOR NO TEST. SAME COLUMNS. ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-1.1/blastest/obj/000077500000000000000000000000001474157777200147245ustar00rootroot00000000000000blis-1.1/blastest/obj/.gitkeep000066400000000000000000000000001474157777200163430ustar00rootroot00000000000000blis-1.1/blastest/src/000077500000000000000000000000001474157777200147415ustar00rootroot00000000000000blis-1.1/blastest/src/cblat1.c000066400000000000000000000716151474157777200162650ustar00rootroot00000000000000/* cblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy, mode; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static integer c__5 = 5; static real c_b43 = 1.f; static real c_b52 = 0.f; /* > \brief \b CBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM CBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX Level 1 BLAS. */ /* > Based upon the original BLAS test routine together with: */ /* > */ /* > F06GAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "cblat1"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static real sfac = 9.765625e-4f; /* Format strings */ static char fmt_99999[] = "(\002 Complex BLAS Test Program Results\002,/" "1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check1_(real *), check2_(real *), header_( void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 10; ++ic) { combla_1.icase = ic; header_(); /* Initialize PASS, INCX, INCY, and MODE for a new case. */ /* The value 9999 for INCX, INCY or MODE will appear in the */ /* detailed output, if any, for cases that do not involve */ /* these parameters. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; combla_1.mode = 9999; if (combla_1.icase <= 5) { check2_(&sfac); } else if (combla_1.icase >= 6) { check1_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*10] = "CDOTC " "CDOTU " "CAXPY " "CCOPY " "CSWAP " "SCNR" "M2" "SCASUM" "CSCAL " "CSSCAL" "ICAMAX"; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check1_(real *sfac) { /* Initialized data */ static real strue2[5] = { 0.f,.5f,.6f,.7f,.8f }; static real strue4[5] = { 0.f,.7f,1.f,1.3f,1.6f }; static complex ctrue5[80] /* was [8][5][2] */ = { {.1f,.1f},{1.f,2.f},{ 1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{-.16f, -.37f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f} ,{3.f,4.f},{-.17f,-.19f},{.13f,-.39f},{5.f,6.f},{5.f,6.f},{5.f, 6.f},{5.f,6.f},{5.f,6.f},{5.f,6.f},{.11f,-.03f},{-.17f,.46f},{ -.17f,-.19f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{ .19f,-.17f},{.2f,-.35f},{.35f,.2f},{.14f,.08f},{2.f,3.f},{2.f,3.f} ,{2.f,3.f},{2.f,3.f},{.1f,.1f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f, 5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{-.16f,-.37f},{6.f,7.f},{6.f, 7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{-.17f, -.19f},{8.f,9.f},{.13f,-.39f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{2.f, 5.f},{2.f,5.f},{.11f,-.03f},{3.f,6.f},{-.17f,.46f},{4.f,7.f},{ -.17f,-.19f},{7.f,2.f},{7.f,2.f},{7.f,2.f},{.19f,-.17f},{5.f,8.f}, {.2f,-.35f},{6.f,9.f},{.35f,.2f},{8.f,3.f},{.14f,.08f},{9.f,4.f} } ; static complex ctrue6[80] /* was [8][5][2] */ = { {.1f,.1f},{1.f,2.f},{ 1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{.09f, -.12f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f} ,{3.f,4.f},{.03f,-.09f},{.15f,-.03f},{5.f,6.f},{5.f,6.f},{5.f,6.f} ,{5.f,6.f},{5.f,6.f},{5.f,6.f},{.03f,.03f},{-.18f,.03f},{.03f, -.09f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{.09f, .03f},{.15f,0.f},{0.f,.15f},{0.f,.06f},{2.f,3.f},{2.f,3.f},{2.f, 3.f},{2.f,3.f},{.1f,.1f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{ 4.f,5.f},{4.f,5.f},{4.f,5.f},{.09f,-.12f},{6.f,7.f},{6.f,7.f},{ 6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{.03f,-.09f},{ 8.f,9.f},{.15f,-.03f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{ 2.f,5.f},{.03f,.03f},{3.f,6.f},{-.18f,.03f},{4.f,7.f},{.03f,-.09f} ,{7.f,2.f},{7.f,2.f},{7.f,2.f},{.09f,.03f},{5.f,8.f},{.15f,0.f},{ 6.f,9.f},{0.f,.15f},{8.f,3.f},{0.f,.06f},{9.f,4.f} }; static integer itrue3[5] = { 0,1,2,2,2 }; static real sa = .3f; static complex ca = {.4f,-.7f}; static complex cv[80] /* was [8][5][2] */ = { {.1f,.1f},{1.f,2.f},{ 1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{.3f, -.4f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f}, {3.f,4.f},{.1f,-.3f},{.5f,-.1f},{5.f,6.f},{5.f,6.f},{5.f,6.f},{ 5.f,6.f},{5.f,6.f},{5.f,6.f},{.1f,.1f},{-.6f,.1f},{.1f,-.3f},{7.f, 8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{.3f,.1f},{.5f,0.f},{ 0.f,.5f},{0.f,.2f},{2.f,3.f},{2.f,3.f},{2.f,3.f},{2.f,3.f},{.1f, .1f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{ 4.f,5.f},{.3f,-.4f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f, 7.f},{6.f,7.f},{6.f,7.f},{.1f,-.3f},{8.f,9.f},{.5f,-.1f},{2.f,5.f} ,{2.f,5.f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{.1f,.1f},{3.f,6.f},{ -.6f,.1f},{4.f,7.f},{.1f,-.3f},{7.f,2.f},{7.f,2.f},{7.f,2.f},{.3f, .1f},{5.f,8.f},{.5f,0.f},{6.f,9.f},{0.f,.5f},{8.f,3.f},{0.f,.2f},{ 9.f,4.f} }; /* System generated locals */ integer i__1, i__2, i__3; real r__1; complex q__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; complex cx[8]; integer np1, len; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *), ctest_(integer *, complex *, complex *, complex *, real *); complex mwpcs[5], mwpct[5]; extern real scnrm2_(integer *, complex *, integer *); extern /* Subroutine */ int itest1_(integer *, integer *), stest1_(real *, real *, real *, real *); extern integer icamax_(integer *, complex *, integer *); extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer *); extern real scasum_(integer *, complex *, integer *); /* Fortran I/O blocks */ static cilist io___19 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ - 1; i__3 = i__ + (np1 + combla_1.incx * 5 << 3) - 49; cx[i__2].r = cv[i__3].r, cx[i__2].i = cv[i__3].i; /* L20: */ } if (combla_1.icase == 6) { /* .. SCNRM2 .. */ r__1 = scnrm2_(&combla_1.n, cx, &combla_1.incx); stest1_(&r__1, &strue2[np1 - 1], &strue2[np1 - 1], sfac); } else if (combla_1.icase == 7) { /* .. SCASUM .. */ r__1 = scasum_(&combla_1.n, cx, &combla_1.incx); stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); } else if (combla_1.icase == 8) { /* .. CSCAL .. */ cscal_(&combla_1.n, &ca, cx, &combla_1.incx); ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 9) { /* .. CSSCAL .. */ csscal_(&combla_1.n, &sa, cx, &combla_1.incx); ctest_(&len, cx, &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 10) { /* .. ICAMAX .. */ i__1 = icamax_(&combla_1.n, cx, &combla_1.incx); itest1_(&i__1, &itrue3[np1 - 1]); } else { s_wsle(&io___19); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } combla_1.incx = 1; if (combla_1.icase == 8) { /* CSCAL */ /* Add a test for alpha equal to zero. */ ca.r = 0.f, ca.i = 0.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0.f, mwpct[i__1].i = 0.f; i__1 = i__ - 1; mwpcs[i__1].r = 1.f, mwpcs[i__1].i = 1.f; /* L80: */ } cscal_(&c__5, &ca, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } else if (combla_1.icase == 9) { /* CSSCAL */ /* Add a test for alpha equal to zero. */ sa = 0.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0.f, mwpct[i__1].i = 0.f; i__1 = i__ - 1; mwpcs[i__1].r = 1.f, mwpcs[i__1].i = 1.f; /* L100: */ } csscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to one. */ sa = 1.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; mwpct[i__1].r = cx[i__2].r, mwpct[i__1].i = cx[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; mwpcs[i__1].r = cx[i__2].r, mwpcs[i__1].i = cx[i__2].i; /* L120: */ } csscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to minus one. */ sa = -1.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; q__1.r = -cx[i__2].r, q__1.i = -cx[i__2].i; mwpct[i__1].r = q__1.r, mwpct[i__1].i = q__1.i; i__1 = i__ - 1; i__2 = i__ - 1; q__1.r = -cx[i__2].r, q__1.i = -cx[i__2].i; mwpcs[i__1].r = q__1.r, mwpcs[i__1].i = q__1.i; /* L140: */ } csscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } return 0; } /* check1_ */ /* Subroutine */ int check2_(real *sfac) { /* Initialized data */ static complex ca = {.4f,-.7f}; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static complex cx1[7] = { {.7f,-.8f},{-.4f,-.7f},{-.1f,-.9f},{.2f,-.8f},{ -.9f,-.4f},{.1f,.4f},{-.6f,.6f} }; static complex cy1[7] = { {.6f,-.6f},{-.9f,.5f},{.7f,-.6f},{.1f,-.5f},{ -.1f,-.2f},{-.5f,-.3f},{.8f,-.7f} }; static complex ct8[112] /* was [7][4][4] */ = { {.6f,-.6f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.32f,-1.41f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.32f, -1.41f},{-1.55f,.5f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{.32f,-1.41f},{-1.55f,.5f},{.03f,-.89f},{-.38f,-.96f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{0.f,0.f},{.32f,-1.41f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{-.07f,-.89f},{-.9f,.5f},{ .42f,-1.41f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.78f,.06f},{ -.9f,.5f},{.06f,-.13f},{.1f,-.5f},{-.77f,-.49f},{-.5f,-.3f},{.52f, -1.51f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.32f,-1.41f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{-.07f,-.89f},{-1.18f,-.31f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.78f,.06f},{-1.54f,.97f},{ .03f,-.89f},{-.18f,-1.31f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f, -.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {.32f,-1.41f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{.32f,-1.41f},{-.9f,.5f},{.05f,-.6f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{.32f,-1.41f},{-.9f,.5f},{.05f,-.6f},{.1f, -.5f},{-.77f,-.49f},{-.5f,-.3f},{.32f,-1.16f} }; static complex ct7[16] /* was [4][4] */ = { {0.f,0.f},{-.06f,-.9f},{ .65f,-.47f},{-.34f,-1.22f},{0.f,0.f},{-.06f,-.9f},{-.59f,-1.46f},{ -1.04f,-.04f},{0.f,0.f},{-.06f,-.9f},{-.83f,.59f},{.07f,-.37f},{ 0.f,0.f},{-.06f,-.9f},{-.76f,-1.15f},{-1.33f,-1.82f} }; static complex ct6[16] /* was [4][4] */ = { {0.f,0.f},{.9f,.06f},{ .91f,-.77f},{1.8f,-.1f},{0.f,0.f},{.9f,.06f},{1.45f,.74f},{.2f, .9f},{0.f,0.f},{.9f,.06f},{-.55f,.23f},{.83f,-.39f},{0.f,0.f},{ .9f,.06f},{1.04f,.79f},{1.95f,1.22f} }; static complex ct10x[112] /* was [7][4][4] */ = { {.7f,-.8f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f}, {-.9f,.5f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f, -.6f},{-.9f,.5f},{.7f,-.6f},{.1f,-.5f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.7f,-.6f},{-.4f,-.7f},{.6f,-.6f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{.8f,-.7f},{-.4f,-.7f},{-.1f,-.2f},{.2f, -.8f},{.7f,-.6f},{.1f,.4f},{.6f,-.6f},{.7f,-.8f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{-.9f,.5f},{ -.4f,-.7f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ .1f,-.5f},{-.4f,-.7f},{.7f,-.6f},{.2f,-.8f},{-.9f,.5f},{.1f,.4f},{ .6f,-.6f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{.6f,-.6f},{.7f,-.6f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{.7f,-.6f},{-.1f,-.2f},{ .8f,-.7f},{0.f,0.f},{0.f,0.f},{0.f,0.f} }; static complex ct10y[112] /* was [7][4][4] */ = { {.6f,-.6f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f}, {-.4f,-.7f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ .7f,-.8f},{-.4f,-.7f},{-.1f,-.9f},{.2f,-.8f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{-.1f,-.9f},{-.9f,.5f},{.7f,-.8f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{-.6f,.6f},{-.9f,.5f},{-.9f,-.4f},{ .1f,-.5f},{-.1f,-.9f},{-.5f,-.3f},{.7f,-.8f},{.6f,-.6f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{-.1f,-.9f} ,{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ -.6f,.6f},{-.9f,-.4f},{-.1f,-.9f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{.7f,-.8f},{-.9f,.5f},{-.4f,-.7f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f},{-.9f,.5f},{-.4f,-.7f},{ .1f,-.5f},{-.1f,-.9f},{-.5f,-.3f},{.2f,-.8f} }; static complex csize1[4] = { {0.f,0.f},{.9f,.9f},{1.63f,1.73f},{2.9f, 2.78f} }; static complex csize3[14] = { {0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{1.17f,1.17f},{1.17f,1.17f},{1.17f, 1.17f},{1.17f,1.17f},{1.17f,1.17f},{1.17f,1.17f},{1.17f,1.17f} }; static complex csize2[14] /* was [7][2] */ = { {0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{1.54f,1.54f},{1.54f, 1.54f},{1.54f,1.54f},{1.54f,1.54f},{1.54f,1.54f},{1.54f,1.54f},{ 1.54f,1.54f} }; /* System generated locals */ integer i__1, i__2; complex q__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, ki, kn; complex cx[7], cy[7]; integer mx, my; complex cdot[1]; integer lenx, leny; extern /* Complex */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL void cdotc_(complex *, #else complex cdotc_( #endif integer *, complex *, integer *, complex *, integer *); extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); extern /* Complex */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL void cdotu_(complex *, #else complex cdotu_( #endif integer *, complex *, integer *, complex *, integer *); extern /* Subroutine */ int cswap_(integer *, complex *, integer *, complex *, integer *), ctest_(integer *, complex *, complex *, complex *, real *); integer ksize; extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, integer *, complex *, integer *); /* Fortran I/O blocks */ static cilist io___48 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; cx[i__1].r = cx1[i__2].r, cx[i__1].i = cx1[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; cy[i__1].r = cy1[i__2].r, cy[i__1].i = cy1[i__2].i; /* L20: */ } if (combla_1.icase == 1) { /* .. CDOTC .. */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL cdotc_(&q__1, #else q__1 = cdotc_( #endif &combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = q__1.r, cdot[0].i = q__1.i; ctest_(&c__1, cdot, &ct6[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. CDOTU .. */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL cdotu_(&q__1, #else q__1 = cdotu_( #endif &combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = q__1.r, cdot[0].i = q__1.i; ctest_(&c__1, cdot, &ct7[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 3) { /* .. CAXPY .. */ caxpy_(&combla_1.n, &ca, cx, &combla_1.incx, cy, & combla_1.incy); ctest_(&leny, cy, &ct8[(kn + (ki << 2)) * 7 - 35], &csize2[ ksize * 7 - 7], sfac); } else if (combla_1.icase == 4) { /* .. CCOPY .. */ ccopy_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else if (combla_1.icase == 5) { /* .. CSWAP .. */ cswap_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&lenx, cx, &ct10x[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else { s_wsle(&io___48); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } return 0; } /* check2_ */ /* Subroutine */ int stest_(integer *len, real *scomp, real *strue, real * ssize, real *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE I " " \002,\002 COMP(I) TRU" "E(I) DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,i3,2e36.8,2e12.4)"; /* System generated locals */ integer i__1; real r__1, r__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; real sd; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___51 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___52 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___53 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((r__2 = *sfac * sd, abs(r__2)) <= (r__1 = ssize[i__], abs(r__1)) * s_epsilon_(&c_b52)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___51); e_wsfe(); s_wsfe(&io___52); e_wsfe(); L20: s_wsfe(&io___53); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(real)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int stest1_(real *scomp1, real *strue1, real *ssize, real * sfac) { real scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ real sdiff_(real *sa, real *sb) { /* System generated locals */ real ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int ctest_(integer *len, complex *ccomp, complex *ctrue, complex *csize, real *sfac) { /* System generated locals */ integer i__1, i__2; /* Builtin functions */ double r_imag(complex *); /* Local variables */ integer i__; real scomp[20], ssize[20], strue[20]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* **************************** CTEST ***************************** */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --csize; --ctrue; --ccomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; scomp[(i__ << 1) - 2] = ccomp[i__2].r; scomp[(i__ << 1) - 1] = r_imag(&ccomp[i__]); i__2 = i__; strue[(i__ << 1) - 2] = ctrue[i__2].r; strue[(i__ << 1) - 1] = r_imag(&ctrue[i__]); i__2 = i__; ssize[(i__ << 1) - 2] = csize[i__2].r; ssize[(i__ << 1) - 1] = r_imag(&csize[i__]); /* L20: */ } i__1 = *len << 1; stest_(&i__1, scomp, strue, ssize, sfac); return 0; } /* ctest_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE " " \002,\002 COMP TRU" "E DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___60 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___61 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___63 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___60); e_wsfe(); s_wsfe(&io___61); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___63); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int cblat1_ () { main (); return 0; } blis-1.1/blastest/src/cblat2.c000066400000000000000000004766641474157777200163030ustar00rootroot00000000000000/* cblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static integer c__6 = 6; static real c_b122 = 0.f; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b CBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM CBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 17 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 35 lines: */ /* > 'cblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > CGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CGERC T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CGERU T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHER2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "cblat2"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*17] = "CGEMV " "CGBMV " "CHEMV " "CHBMV " "CHPMV " "CTRMV " "CTBMV " "CTPMV " "CTRSV " "CTBSV " "CTPSV " "CGERC " "CGERU " "CHER " "CHPR " "CHER2 " "CHPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE COMPLEX LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9988[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9985[] = "(\002 ERROR IN CMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 CMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ complex a[4225] /* was [65][65] */; real g[65]; integer i__, j, n; complex x[65], y[65], z__[130], aa[4225]; integer kb[7]; complex as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7]; extern logical lce_(complex *, complex *, integer *); integer inc[7], nkb; complex bet[7]; real eps, err; integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk4_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk6_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchke_(integer * , char *, integer *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; logical ltest[17], sfatal; char snamet[6]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 17; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 17; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b122); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of CMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); a[i__3].r = (real) i__4, a[i__3].i = 0.f; /* L110: */ } i__2 = j - 1; x[i__2].r = (real) j, x[i__2].i = 0.f; i__2 = j - 1; y[i__2].r = 0.f, y[i__2].i = 0.f; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; yy[i__2].r = (real) i__3, yy[i__2].i = 0.f; /* L130: */ } /* YY holds the exact result. On exit from CMVCH YT holds */ /* the result computed by CMVCH. */ *(unsigned char *)trans = 'N'; cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lce_(yy, yt, &n); if (! same || err != 0.f) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lce_(yy, yt, &n); if (! same || err != 0.f) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 17; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { cchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L170; case 14: goto L180; case 15: goto L180; case 16: goto L190; case 17: goto L190; } /* Test CGEMV, 01, and CGBMV, 02. */ L140: cchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test CHEMV, 03, CHBMV, 04, and CHPMV, 05. */ L150: cchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test CTRMV, 06, CTBMV, 07, CTPMV, 08, */ /* CTRSV, 09, CTBSV, 10, and CTPSV, 11. */ L160: cchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test CGERC, 12, CGERU, 13. */ L170: cchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test CHER, 14, and CHPR, 15. */ L180: cchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test CHER2, 16, and CHPR2, 17. */ L190: cchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of CBLAT2. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int cchk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als, bls; real err; integer iku, kls, kus; complex beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cgbmv_(char *, integer *, integer *, integer * , integer *, complex *, complex *, integer *, complex *, integer * , complex *, complex *, integer *, ftnlen), cgemv_(char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cmvch_(char * , integer *, integer *, complex *, complex *, integer *, complex * , integer *, complex *, complex *, integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; char trans[1]; logical banded; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CGEMV and CGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; cmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__4 = abs(incx); i__5 = nl - 1; cmake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { i__4 = nl / 2; x[i__4].r = 0.f, x[i__4].i = 0.f; i__4 = abs(incx) * (nl / 2 - 1) + 1; xx[i__4].r = 0.f, xx[i__4].i = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7] .i; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__7 = abs(incy); i__8 = ml - 1; cmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lce_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lce_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lceres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lce_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lce_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lce_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lceres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ cmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of CCHK1. */ } /* cchk1_ */ /* Subroutine */ int cchk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3,\002, X,\002," "i2,\002,(\002,f4.1,\002,\002,f4.1,\002), \002,\002Y,\002,i2,\002" ") .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), AP, X,\002,i2,\002,(" "\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2,\002) " ".\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als, bls; real err; complex beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int chbmv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, ftnlen), chemv_(char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int chpmv_(char *, integer *, complex *, complex * , complex *, integer *, complex *, complex *, integer *, ftnlen); logical reset; integer incxs, incys; char uplos[1]; logical banded, packed; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CHEMV, CHBMV and CHPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__4 = abs(incx); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0.f, x[i__4].i = 0.f; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0.f, xx[i__4].i = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7].i; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__7 = abs(incy); i__8 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[ i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[ i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[ i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chemv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chpmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lce_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[8] = lce_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lceres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lce_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lce_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lceres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&as[1], &aa[1], &laa); isame[4] = lce_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[7] = lce_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lceres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ cmvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK2. */ } /* cchk2_ */ /* Subroutine */ int cchk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *xt, real *g, complex *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda; extern logical lce_(complex *, complex *, integer *); integer ict, icu; real err; char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); char diags[1]; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int ctbmv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctbsv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); logical reset; integer incxs; char trans[1]; extern /* Subroutine */ int ctpmv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctrmv_( char *, char *, char *, integer *, complex *, integer *, complex * , integer *, ftnlen, ftnlen, ftnlen), ctpsv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CTRMV, CTBMV, CTPMV, CTRSV, CTBSV and CTPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero vector for CMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; z__[i__2].r = 0.f, z__[i__2].i = 0.f; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; cmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__4 = abs(incx); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0.f, x[i__4].i = 0.f; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0.f, xx[i__4].i = 0.f; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6] .i; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6] .i; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lce_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lceres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lce_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lceres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lce_(&as[1], &aa[1], &laa); if (null) { isame[5] = lce_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lceres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ cmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &x[1], &incx, &c_b1, &z__[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = (i__ - 1) * abs(incx) + 1; z__[i__5].r = xx[i__6].r, z__[i__5].i = xx[i__6].i; i__5 = (i__ - 1) * abs(incx) + 1; i__6 = i__; xx[i__5].r = x[i__6].r, xx[i__5].i = x[i__6].i; /* L50: */ } cmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &z__[1], &incx, &c_b1, &x[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK3. */ } /* cchk3_ */ /* Subroutine */ int cchk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002," "\002),\002(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y," "\002,i2,\002, A,\002,i3,\002) \002,\002 " ".\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, m, n; complex w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als; real err; integer ldas; logical same, conj; integer incx, incy; logical null; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen), cgerc_( integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen), cgeru_(integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); integer nargs; logical reset; integer incxs, incys; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___285 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___297 = { 0, 0, 0, fmt_9994, 0 }; /* Tests CGERC and CGERU. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ conj = *(unsigned char *)&sname[4] == 'C'; /* Define the number of arguments. */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__3 = abs(incx); i__4 = m - 1; cmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { i__3 = m / 2; x[i__3].r = 0.f, x[i__3].i = 0.f; i__3 = abs(incx) * (m / 2 - 1) + 1; xx[i__3].r = 0.f, xx[i__3].i = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__4 = abs(incy); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0.f, y[i__4].i = 0.f; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0.f, yy[i__4].i = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; i__5 = m - 1; i__6 = n - 1; cmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___285.ciunit = *ntra; s_wsfe(&io___285); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (conj) { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgerc_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } else { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgeru_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___286.ciunit = *nout; s_wsfe(&io___286); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lce_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lce_(&as[1], &aa[1], &laa); } else { isame[7] = lceres_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = m - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { i__6 = j; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } else { i__6 = n - j + 1; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } if (conj) { r_cnjg(&q__1, w); w[0].r = q__1.r, w[0].i = q__1.i; } cmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b2, &a[j * a_dim1 + 1], & c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L150; L130: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___297.ciunit = *nout; s_wsfe(&io___297); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of CCHK4. */ } /* cchk4_ */ /* Subroutine */ int cchk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; complex q__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, n; complex w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; extern logical lce_(complex *, complex *, integer *); real err; extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, integer *, complex *, integer *, ftnlen); integer ldas; logical same; extern /* Subroutine */ int chpr_(char *, integer *, real *, complex *, integer *, complex *, ftnlen); real rals; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs; logical upper; char uplos[1]; logical packed; real ralpha; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___326 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___327 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___328 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___331 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___342 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___343 = { 0, 0, 0, fmt_9994, 0 }; /* Tests CHER and CHPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__3 = abs(incx); i__4 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0.f, x[i__3].i = 0.f; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0.f, xx[i__3].i = 0.f; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; ralpha = alf[i__4].r; q__1.r = ralpha, q__1.i = 0.f; alpha.r = q__1.r, alpha.i = q__1.i; null = n <= 0 || ralpha == 0.f; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; i__4 = n - 1; i__5 = n - 1; cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; rals = ralpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6].i; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6].i; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___326.ciunit = *ntra; s_wsfe(&io___326); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___327.ciunit = *ntra; s_wsfe(&io___327); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chpr_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___328.ciunit = *nout; s_wsfe(&io___328); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = rals == ralpha; isame[3] = lce_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lce_(&as[1], &aa[1], &laa); } else { isame[5] = lceres_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___331.ciunit = *nout; s_wsfe(&io___331); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = n - i__ + 1; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { r_cnjg(&q__1, &z__[j]); w[0].r = q__1.r, w[0].i = q__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } cmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___342.ciunit = *nout; s_wsfe(&io___342); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK5. */ } /* cchk5_ */ /* Subroutine */ int cchk6_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, A,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, AP) \002,\002 .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1, q__2, q__3; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, n; complex w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als; real err; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * , integer *, complex *, integer *, complex *, integer *, ftnlen), chpr2_(char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, ftnlen), cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; logical upper; char uplos[1]; logical packed; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___380 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___391 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___392 = { 0, 0, 0, fmt_9994, 0 }; /* Tests CHER2 and CHPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__3 = abs(incx); i__4 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0.f, x[i__3].i = 0.f; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0.f, xx[i__3].i = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__4 = abs(incy); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0.f, y[i__4].i = 0.f; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0.f, yy[i__4].i = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; null = n <= 0 || alpha.r == 0.f && alpha.i == 0.f; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; i__5 = n - 1; i__6 = n - 1; cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___375.ciunit = *ntra; s_wsfe(&io___375); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cher2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___376.ciunit = *ntra; s_wsfe(&io___376); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chpr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___377.ciunit = *nout; s_wsfe(&io___377); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lce_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lce_(&as[1], &aa[1], &laa); } else { isame[7] = lceres_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___380.ciunit = *nout; s_wsfe(&io___380); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = n - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = i__; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = n - i__ + 1; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { r_cnjg(&q__2, &z__[j + (z_dim1 << 1)]); q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, q__1.i = alpha.r * q__2.i + alpha.i * q__2.r; w[0].r = q__1.r, w[0].i = q__1.i; r_cnjg(&q__2, &alpha); r_cnjg(&q__3, &z__[j + z_dim1]); q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = q__2.r * q__3.i + q__2.i * q__3.r; w[1].r = q__1.r, w[1].i = q__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } cmvch_("N", &lj, &c__2, &c_b2, &z__[jj + z_dim1], nmax, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], & aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L170; L150: io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___391.ciunit = *nout; s_wsfe(&io___391); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___392.ciunit = *nout; s_wsfe(&io___392); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of CCHK6. */ } /* cchk6_ */ /* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ complex a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, integer *, complex *, integer *, ftnlen), chpr_(char *, integer *, real *, complex *, integer *, complex *, ftnlen), cher2_(char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, ftnlen), chpr2_(char *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, ftnlen), cgerc_(integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); complex alpha; extern /* Subroutine */ int cgbmv_(char *, integer *, integer *, integer * , integer *, complex *, complex *, integer *, complex *, integer * , complex *, complex *, integer *, ftnlen), chbmv_(char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cgemv_(char * , integer *, integer *, complex *, complex *, integer *, complex * , integer *, complex *, complex *, integer *, ftnlen), chemv_( char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cgeru_( integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *), ctbmv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), chpmv_(char *, integer *, complex *, complex *, complex *, integer *, complex *, complex *, integer *, ftnlen), ctbsv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctpmv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctrmv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctpsv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctrsv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); real ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___399 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; case 17: goto L170; } L10: infoc_1.infot = 1; cgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; cgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; cgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L20: infoc_1.infot = 1; cgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L30: infoc_1.infot = 1; chemv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chemv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; chemv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; chemv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L40: infoc_1.infot = 1; chbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; chbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; chbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; chbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L50: infoc_1.infot = 1; chpmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chpmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; chpmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chpmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L60: infoc_1.infot = 1; ctrmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ctrmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L70: infoc_1.infot = 1; ctbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L80: infoc_1.infot = 1; ctpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L90: infoc_1.infot = 1; ctrsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ctrsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L100: infoc_1.infot = 1; ctbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L110: infoc_1.infot = 1; ctpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L120: infoc_1.infot = 1; cgerc_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgerc_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgerc_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cgerc_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cgerc_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L130: infoc_1.infot = 1; cgeru_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgeru_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgeru_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cgeru_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cgeru_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L140: infoc_1.infot = 1; cher_("/", &c__0, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cher_("U", &c_n1, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cher_("U", &c__0, &ralpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher_("U", &c__2, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L150: infoc_1.infot = 1; chpr_("/", &c__0, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chpr_("U", &c_n1, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; chpr_("U", &c__0, &ralpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L160: infoc_1.infot = 1; cher2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cher2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cher2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L170: infoc_1.infot = 1; chpr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chpr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; chpr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chpr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L180: if (infoc_1.ok) { io___399.ciunit = *nout; s_wsfe(&io___399); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___400.ciunit = *nout; s_wsfe(&io___400); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of CCHKE. */ } /* cchke_ */ /* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, complex *a, integer *nmax, complex *aa, integer *lda, integer *kl, integer *ku, logical *reset, complex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; real r__1; complex q__1, q__2; /* Builtin functions */ void r_cnjg(complex *, complex *); integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, jj, kk; logical gen, tri, sym; extern /* Complex */ void cbeg_(complex *, logical *); integer ibeg, iend, ioff; logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'H'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { i__3 = i__ + j * a_dim1; cbeg_(&q__2, reset); q__1.r = q__2.r + transl->r, q__1.i = q__2.i + transl->i; a[i__3].r = q__1.r, a[i__3].i = q__1.i; } else { i__3 = i__ + j * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } if (i__ != j) { if (sym) { i__3 = j + i__ * a_dim1; r_cnjg(&q__1, &a[i__ + j * a_dim1]); a[i__3].r = q__1.r, a[i__3].i = q__1.i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } } } /* L10: */ } if (sym) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; r__1 = a[i__3].r; q__1.r = r__1, q__1.i = 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; q__1.r = a[i__3].r + 1.f, q__1.i = a[i__3].i + 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1.f, a[i__2].i = 0.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { i__3 = i1 + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { i__3 = i2 + (j - 1) * *lda; i__4 = i2 + j - *ku - 1 + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { i__3 = i3 + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L120: */ } if (sym) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; r__1 = aa[i__3].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__2].r = q__1.r, aa[i__2].i = q__1.i; } /* L130: */ } } else if (s_cmp(type__, "HB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j - kk + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L160: */ } if (sym) { jj = kk + (j - 1) * *lda; i__2 = jj; i__3 = jj; r__1 = aa[i__3].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__2].r = q__1.r, aa[i__2].i = q__1.i; } /* L170: */ } } else if (s_cmp(type__, "HP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; i__3 = ioff; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; if (i__ == j) { if (unit) { i__3 = ioff; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; } if (sym) { i__3 = ioff; i__4 = ioff; r__1 = aa[i__4].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__3].r = q__1.r, aa[i__3].i = q__1.i; } } /* L180: */ } /* L190: */ } } return 0; /* End of CMAKE. */ } /* cmake_ */ /* Subroutine */ int cmvch_(char *trans, integer *m, integer *n, complex * alpha, complex *a, integer *nmax, complex *x, integer *incx, complex * beta, complex *y, integer *incy, complex *yt, real *g, complex *yy, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; real r__1, r__2, r__3, r__4, r__5, r__6; complex q__1, q__2, q__3; /* Builtin functions */ double r_imag(complex *); void r_cnjg(complex *, complex *); double c_abs(const complex *), sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; real erri; logical tran, ctran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___430 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___431 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___432 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T'; ctran = *(unsigned char *)trans == 'C'; if (tran || ctran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; yt[i__2].r = 0.f, yt[i__2].i = 0.f; g[iy] = 0.f; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = j + i__ * a_dim1; i__6 = jx; q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, q__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L10: */ } } else if (ctran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; r_cnjg(&q__3, &a[j + i__ * a_dim1]); i__5 = jx; q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, q__2.i = q__3.r * x[i__5].i + q__3.i * x[i__5].r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L20: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = i__ + j * a_dim1; i__6 = jx; q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, q__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = i__ + j * a_dim1; i__4 = jx; g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[ i__ + j * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L30: */ } } i__2 = iy; i__3 = iy; q__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, q__2.i = alpha->r * yt[i__3].i + alpha->i * yt[i__3].r; i__4 = iy; q__3.r = beta->r * y[i__4].r - beta->i * y[i__4].i, q__3.i = beta->r * y[i__4].i + beta->i * y[i__4].r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; yt[i__2].r = q__1.r, yt[i__2].i = q__1.i; i__2 = iy; g[iy] = ((r__1 = alpha->r, abs(r__1)) + (r__2 = r_imag(alpha), abs( r__2))) * g[iy] + ((r__3 = beta->r, abs(r__3)) + (r__4 = r_imag(beta), abs(r__4))) * ((r__5 = y[i__2].r, abs(r__5)) + ( r__6 = r_imag(&y[iy]), abs(r__6))); iy += incyl; /* L40: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = (i__ - 1) * abs(*incy) + 1; q__1.r = yt[i__2].r - yy[i__3].r, q__1.i = yt[i__2].i - yy[i__3].i; erri = c_abs(&q__1) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L60; } /* L50: */ } /* If the loop completes, all results are at least half accurate. */ goto L80; /* Report fatal error. */ L60: *fatal = TRUE_; io___430.ciunit = *nout; s_wsfe(&io___430); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___431.ciunit = *nout; s_wsfe(&io___431); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(real)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); e_wsfe(); } else { io___432.ciunit = *nout; s_wsfe(&io___432); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L70: */ } L80: return 0; /* End of CMVCH. */ } /* cmvch_ */ logical lce_(complex *ri, complex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LCE. */ } /* lce_ */ logical lceres_(char *type__, char *uplo, integer *m, integer *n, complex *aa, complex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'HE' or 'HP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LCERES. */ } /* lceres_ */ /* Complex */ void cbeg_(complex * ret_val, logical *reset) { /* System generated locals */ real r__1, r__2; complex q__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } r__1 = (i__ - 500) / 1001.f; r__2 = (j - 500) / 1001.f; q__1.r = r__1, q__1.i = r__2; ret_val->r = q__1.r, ret_val->i = q__1.i; return ; /* End of CBEG. */ } /* cbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___444 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___444.ciunit = *nout; s_wsfe(&io___444); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___445 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___446 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___447 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___445.ciunit = infoc_2.nout; s_wsfe(&io___445); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___446.ciunit = infoc_2.nout; s_wsfe(&io___446); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___447.ciunit = infoc_2.nout; s_wsfe(&io___447); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int cblat2_ () { main (); return 0; } blis-1.1/blastest/src/cblat3.c000066400000000000000000005735771474157777200163050ustar00rootroot00000000000000/* cblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static integer c__6 = 6; static integer c__2 = 2; static real c_b86 = 0.f; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; /* > \brief \b CBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM CBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 9 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 23 lines: */ /* > 'cblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > CGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHERK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHER2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "cblat3"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*9] = "CGEMM " "CHEMM " "CSYMM " "CTRMM " "CTRSM " "CHERK " "CSYRK " "CHER2K" "CSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE COMPLEX LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9992[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9989[] = "(\002 ERROR IN CMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 CMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ complex c__[4225] /* was [65][65] */; real g[65]; integer i__, j, n; complex w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[ 4225], as[4225], bs[4225], cs[4225], ct[65], alf[7]; extern logical lce_(complex *, complex *, integer *); complex bet[7]; real eps, err; integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk4_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchke_(integer *, char *, integer *, ftnlen); logical fatal; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); logical trace; integer nidim; char snaps[32]; integer isnum; logical ltest[9], sfatal; char snamet[6], transa[1], transb[1]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 9; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 9; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b86); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of CMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); ab[i__3].r = (real) i__4, ab[i__3].i = 0.f; /* L90: */ } i__2 = j + 4224; ab[i__2].r = (real) j, ab[i__2].i = 0.f; i__2 = (j + 65) * 65 - 65; ab[i__2].r = (real) j, ab[i__2].i = 0.f; i__2 = j - 1; c__[i__2].r = 0.f, c__[i__2].i = 0.f; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (real) i__3, cc[i__2].i = 0.f; /* L110: */ } /* CC holds the exact result. On exit from CMMCH CT holds */ /* the result computed by CMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j + 4224; i__3 = n - j + 1; ab[i__2].r = (real) i__3, ab[i__2].i = 0.f; i__2 = (j + 65) * 65 - 65; i__3 = n - j + 1; ab[i__2].r = (real) i__3, ab[i__2].i = 0.f; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n - j; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (real) i__3, cc[i__2].i = 0.f; /* L130: */ } *(unsigned char *)transa = 'C'; *(unsigned char *)transb = 'N'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 9; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { cchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L150; case 4: goto L160; case 5: goto L160; case 6: goto L170; case 7: goto L170; case 8: goto L180; case 9: goto L180; } /* Test CGEMM, 01. */ L140: cchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CHEMM, 02, CSYMM, 03. */ L150: cchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CTRMM, 04, CTRSM, 05. */ L160: cchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test CHERK, 06, CSYRK, 07. */ L170: cchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CHER2K, 08, CSYR2K, 09. */ L180: cchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of CBLAT3. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int cchk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1," "\002), A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1" ",\002), C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); complex als, bls; real err; complex beta; integer ldas, ldbs, ldcs; logical same, null; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical * , integer *, logical *, ftnlen, ftnlen); logical isame[13], trana, tranb; integer nargs; logical reset; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char tranas[1], tranbs[1], transa[1], transb[1]; real errmax; /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ cmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ cmake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b1, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { i__6 = ib; beta.r = bet[i__6].r, beta.i = bet[i__6].i; /* Generate the matrix C. */ cmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; as[i__7].r = aa[i__8].r, as[i__7].i = aa[ i__8].i; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; bs[i__7].r = bb[i__8].r, bs[i__7].i = bb[ i__8].i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; cs[i__7].r = cc[i__8].r, cs[i__7].i = cc[ i__8].i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lce_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lce_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lce_(&cs[1], &cc[1], &lcc); } else { isame[11] = lceres_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ cmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of CCHK1. */ } /* cchk1_ */ /* Subroutine */ int cchk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); integer ics; complex als, bls; integer icu; real err; complex beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical conj, left, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen), chemm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int csymm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char uplos[1]; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; /* Fortran I/O blocks */ static cilist io___172 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___173 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___176 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___181 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CHEMM and CSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ cmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the hermitian or symmetric matrix A. */ cmake_(sname + 1, uplo, " ", &na, &na, &a[a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; /* Generate the matrix C. */ cmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___172.ciunit = *ntra; s_wsfe(&io___172); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } if (conj) { chemm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { csymm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___173.ciunit = *nout; s_wsfe(&io___173); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lce_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[10] = lce_(&cs[1], &cc[1], &lcc); } else { isame[10] = lceres_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___176.ciunit = *nout; s_wsfe(&io___176); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { cmmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { cmmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L120; L110: io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___181.ciunit = *nout; s_wsfe(&io___181); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of CCHK2. */ } /* cchk2_ */ /* Subroutine */ int cchk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nmax, complex *a, complex *aa, complex *as, complex *b, complex *bb, complex *bs, complex *ct, real *g, complex *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb; extern logical lce_(complex *, complex *, integer *); integer ics; complex als; integer ict, icu; real err; char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; char diags[1]; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ctrsm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char uplos[1]; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char tranas[1], transa[1]; real errmax; /* Fortran I/O blocks */ static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___224 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___227 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___232 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CTRMM and CTRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero matrix for CMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * c_dim1; c__[i__3].r = 0.f, c__[i__3].i = 0.f; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; /* Generate the matrix A. */ cmake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); /* Generate the matrix B. */ cmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[ i__6].i; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; bs[i__5].r = bb[i__6].r, bs[i__5].i = bb[ i__6].i; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___223.ciunit = *ntra; s_wsfe(&io___223); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___224.ciunit = *nout; s_wsfe(&io___224); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als.r == alpha.r && als.i == alpha.i; isame[7] = lce_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lce_(&bs[1], &bb[1], &lbb); } else { isame[9] = lceres_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___227.ciunit = *nout; s_wsfe(&io___227); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { cmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { cmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + j * c_dim1; i__7 = i__ + (j - 1) * ldb; c__[i__6].r = bb[i__7].r, c__[i__6].i = bb[i__7].i; i__6 = i__ + (j - 1) * ldb; i__7 = i__ + j * b_dim1; q__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, q__1.i = alpha.r * b[i__7].i + alpha.i * b[ i__7].r; bb[i__6].r = q__1.r, bb[i__6].i = q__1.i; /* L60: */ } /* L70: */ } if (left) { cmmch_(transa, "N", &m, &n, &m, & c_b2, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { cmmch_("N", transa, &m, &n, &n, & c_b2, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L150: io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___232.ciunit = *nout; s_wsfe(&io___232); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of CCHK3. */ } /* cchk3_ */ /* Subroutine */ int cchk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" " , A,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002), C,\002,i3," "\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; extern logical lce_(complex *, complex *, integer *); complex als; integer ict, icu; real err; complex beta; integer ldas, ldcs; logical same, conj; complex bets; real rals; logical tran, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen), cherk_(char *, char *, integer *, integer *, real *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen); real rbeta; logical isame[13]; integer nargs; real rbets; logical reset; char trans[1]; logical upper; extern /* Subroutine */ int csyrk_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char uplos[1]; real ralpha; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___274 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___275 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___276 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___287 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___290 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___291 = { 0, 0, 0, fmt_9993, 0 }; /* Tests CHERK and CSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 10; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ cmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; if (conj) { ralpha = alpha.r; q__1.r = ralpha, q__1.i = 0.f; alpha.r = q__1.r, alpha.i = q__1.i; } i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; q__1.r = rbeta, q__1.i = 0.f; beta.r = q__1.r, beta.i = q__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || ralpha == 0.f) && rbeta == 1.f; } /* Generate the matrix C. */ cmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; if (conj) { rals = ralpha; } else { als.r = alpha.r, als.i = alpha.i; } i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___274.ciunit = *ntra; s_wsfe(&io___274); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cherk_(uplo, trans, &n, &k, &ralpha, &aa[1], & lda, &rbeta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___275.ciunit = *ntra; s_wsfe(&io___275); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } csyrk_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &beta, &cc[1], &ldc, (ftnlen)1, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___276.ciunit = *nout; s_wsfe(&io___276); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; if (conj) { isame[4] = rals == ralpha; } else { isame[4] = als.r == alpha.r && als.i == alpha.i; } isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (conj) { isame[7] = rbets == rbeta; } else { isame[7] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[8] = lce_(&cs[1], &cc[1], &lcc); } else { isame[8] = lceres_(sname + 1, uplo, &n, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { cmmch_(transt, "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { cmmch_("N", transt, &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___286.ciunit = *nout; s_wsfe(&io___286); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___287.ciunit = *nout; s_wsfe(&io___287); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: if (n > 1) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___290.ciunit = *nout; s_wsfe(&io___290); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___291.ciunit = *nout; s_wsfe(&io___291); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK4. */ } /* cchk4_ */ /* Subroutine */ int cchk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *ab, complex *aa, complex * as, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, complex *w, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,\002,f4.1,\002, C,\002,i3,\002)" " .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; complex q__1, q__2; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); complex als; integer ict, icu; real err; integer jjab; complex beta; integer ldas, ldbs, ldcs; logical same, conj; complex bets; logical tran, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); real rbeta; logical isame[13]; integer nargs; real rbets; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int cher2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___334 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___347 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___348 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___349 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___350 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___351 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___352 = { 0, 0, 0, fmt_9993, 0 }; /* Tests CHER2K and CSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; cmake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } else { cmake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; cmake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { cmake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; q__1.r = rbeta, q__1.i = 0.f; beta.r = q__1.r, beta.i = q__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || alpha.r == 0.f && alpha.i == 0.f) && rbeta == 1.f; } /* Generate the matrix C. */ cmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___334.ciunit = *ntra; s_wsfe(&io___334); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cher2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &rbeta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___335.ciunit = *ntra; s_wsfe(&io___335); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } csyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___336.ciunit = *nout; s_wsfe(&io___336); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lce_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; if (conj) { isame[9] = rbets == rbeta; } else { isame[9] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[10] = lce_(&cs[1], &cc[1], &lcc); } else { isame[10] = lceres_("HE", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = (j - 1 << 1) * *nmax + k + i__; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8].i, q__1.i = alpha.r * ab[ i__8].i + alpha.i * ab[ i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; if (conj) { i__7 = k + i__; r_cnjg(&q__2, &alpha); i__8 = (j - 1 << 1) * *nmax + i__; q__1.r = q__2.r * ab[i__8].r - q__2.i * ab[i__8].i, q__1.i = q__2.r * ab[i__8].i + q__2.i * ab[ i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } else { i__7 = k + i__; i__8 = (j - 1 << 1) * *nmax + i__; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; cmmch_(transt, "N", &lj, &c__1, &i__6, &c_b2, &ab[jjab], &i__7, &w[ 1], &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1] , &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { if (conj) { i__7 = i__; r_cnjg(&q__2, &ab[(k + i__ - 1) * *nmax + j]); q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, q__1.i = alpha.r * q__2.i + alpha.i * q__2.r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; q__2.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__2.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; r_cnjg(&q__1, &q__2); w[i__7].r = q__1.r, w[i__7].i = q__1.i; } else { i__7 = i__; i__8 = (k + i__ - 1) * *nmax + j; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; cmmch_("N", "N", &lj, &c__1, &i__6, & c_b2, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___347.ciunit = *nout; s_wsfe(&io___347); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___348.ciunit = *nout; s_wsfe(&io___348); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L140: if (n > 1) { io___349.ciunit = *nout; s_wsfe(&io___349); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___350.ciunit = *nout; s_wsfe(&io___350); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___351.ciunit = *nout; s_wsfe(&io___351); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___352.ciunit = *nout; s_wsfe(&io___352); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L160: return 0; /* End of CCHK5. */ } /* cchk5_ */ /* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ complex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), chemm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), cherk_(char *, char *, integer *, integer *, real *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen); real rbeta; extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), csymm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), ctrsm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), csyrk_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), cher2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___360 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___361 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) */ /* 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM */ /* with INFOT = 9 (eca) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA, BETA, RALPHA, and RBETA. */ alpha.r = 1.f, alpha.i = -1.f; beta.r = 2.f, beta.i = -2.f; ralpha = 1.f; rbeta = 2.f; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; } L10: infoc_1.infot = 1; cgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; cgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; cgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L20: infoc_1.infot = 1; chemm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chemm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L30: infoc_1.infot = 1; csymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; csymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L40: infoc_1.infot = 1; ctrmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L50: infoc_1.infot = 1; ctrsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L60: infoc_1.infot = 1; cherk_("/", "N", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cherk_("U", "T", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("U", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("U", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("L", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("L", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("U", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("U", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("L", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("L", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("U", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("L", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("U", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("L", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L70: infoc_1.infot = 1; csyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; csyrk_("U", "C", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L80: infoc_1.infot = 1; cher2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cher2k_("U", "T", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("U", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("L", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("U", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("L", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("U", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("L", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L90: infoc_1.infot = 1; csyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; csyr2k_("U", "C", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L100: if (infoc_1.ok) { io___360.ciunit = *nout; s_wsfe(&io___360); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___361.ciunit = *nout; s_wsfe(&io___361); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of CCHKE. */ } /* cchke_ */ /* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, complex *a, integer *nmax, complex *aa, integer *lda, logical *reset, complex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; real r__1; complex q__1, q__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, jj; logical gen, her, tri, sym; extern /* Complex */ void cbeg_(complex *, logical *); integer ibeg, iend; logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'HE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; her = s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (her || sym || tri) && *(unsigned char *)uplo == 'U'; lower = (her || sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { i__3 = i__ + j * a_dim1; cbeg_(&q__2, reset); q__1.r = q__2.r + transl->r, q__1.i = q__2.i + transl->i; a[i__3].r = q__1.r, a[i__3].i = q__1.i; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { i__3 = i__ + j * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } if (her) { i__3 = j + i__ * a_dim1; r_cnjg(&q__1, &a[i__ + j * a_dim1]); a[i__3].r = q__1.r, a[i__3].i = q__1.i; } else if (sym) { i__3 = j + i__ * a_dim1; i__4 = i__ + j * a_dim1; a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } } } /* L10: */ } if (her) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; r__1 = a[i__3].r; q__1.r = r__1, q__1.i = 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; q__1.r = a[i__3].r + 1.f, q__1.i = a[i__3].i + 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1.f, a[i__2].i = 0.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen) 2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L80: */ } if (her) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; r__1 = aa[i__3].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__2].r = q__1.r, aa[i__2].i = q__1.i; } /* L90: */ } } return 0; /* End of CMAKE. */ } /* cmake_ */ /* Subroutine */ int cmmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc, complex *ct, real *g, complex *cc, integer *ldcc, real *eps, real *err, logical * fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; real r__1, r__2, r__3, r__4, r__5, r__6; complex q__1, q__2, q__3, q__4; /* Builtin functions */ double r_imag(complex *); void r_cnjg(complex *, complex *); double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; real erri; logical trana, tranb, ctrana, ctranb; /* Fortran I/O blocks */ static cilist io___382 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___383 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___384 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; ctrana = *(unsigned char *)transa == 'C'; ctranb = *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; ct[i__3].r = 0.f, ct[i__3].i = 0.f; g[i__] = 0.f; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = k + j * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, q__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[ i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag( &a[i__ + k * a_dim1]), abs(r__2))) * ((r__3 = b[ i__5].r, abs(r__3)) + (r__4 = r_imag(&b[k + j * b_dim1]), abs(r__4))); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { if (ctrana) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); i__6 = k + j * b_dim1; q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, q__2.i = q__3.r * b[i__6].i + q__3.i * b[i__6] .r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[k + j * b_dim1]), abs(r__4))); /* L40: */ } /* L50: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = k + j * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, q__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[k + j * b_dim1]), abs(r__4))); /* L60: */ } /* L70: */ } } } else if (! trana && tranb) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; r_cnjg(&q__3, &b[j + k * b_dim1]); q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, q__2.i = a[i__6].r * q__3.i + a[i__6].i * q__3.r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[j + k * b_dim1]), abs(r__4))); /* L80: */ } /* L90: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = j + k * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, q__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[j + k * b_dim1]), abs(r__4))); /* L100: */ } /* L110: */ } } } else if (trana && tranb) { if (ctrana) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); r_cnjg(&q__4, &b[j + k * b_dim1]); q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i + q__3.i * q__4.r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L120: */ } /* L130: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); i__6 = j + k * b_dim1; q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, q__2.i = q__3.r * b[i__6].i + q__3.i * b[ i__6].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L140: */ } /* L150: */ } } } else { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; r_cnjg(&q__3, &b[j + k * b_dim1]); q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, q__2.i = a[i__6].r * q__3.i + a[i__6].i * q__3.r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L160: */ } /* L170: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = j + k * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[ i__7].i, q__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L180: */ } /* L190: */ } } } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; q__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, q__2.i = alpha->r * ct[i__4].i + alpha->i * ct[i__4].r; i__5 = i__ + j * c_dim1; q__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, q__3.i = beta->r * c__[i__5].i + beta->i * c__[i__5].r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; ct[i__3].r = q__1.r, ct[i__3].i = q__1.i; i__3 = i__ + j * c_dim1; g[i__] = ((r__1 = alpha->r, abs(r__1)) + (r__2 = r_imag(alpha), abs(r__2))) * g[i__] + ((r__3 = beta->r, abs(r__3)) + ( r__4 = r_imag(beta), abs(r__4))) * ((r__5 = c__[i__3].r, abs(r__5)) + (r__6 = r_imag(&c__[i__ + j * c_dim1]), abs( r__6))); /* L200: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__ + j * cc_dim1; q__2.r = ct[i__3].r - cc[i__4].r, q__2.i = ct[i__3].i - cc[i__4] .i; q__1.r = q__2.r, q__1.i = q__2.i; erri = ((r__1 = q__1.r, abs(r__1)) + (r__2 = r_imag(&q__1), abs( r__2))) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L230; } /* L210: */ } /* L220: */ } /* If the loop completes, all results are at least half accurate. */ goto L250; /* Report fatal error. */ L230: *fatal = TRUE_; io___382.ciunit = *nout; s_wsfe(&io___382); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___383.ciunit = *nout; s_wsfe(&io___383); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(real)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); e_wsfe(); } else { io___384.ciunit = *nout; s_wsfe(&io___384); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L240: */ } if (*n > 1) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L250: return 0; /* End of CMMCH. */ } /* cmmch_ */ logical lce_(complex *ri, complex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LCE. */ } /* lce_ */ logical lceres_(char *type__, char *uplo, integer *m, integer *n, complex *aa, complex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'HE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LCERES. */ } /* lceres_ */ /* Complex */ void cbeg_(complex * ret_val, logical *reset) { /* System generated locals */ real r__1, r__2; complex q__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } r__1 = (i__ - 500) / 1001.f; r__2 = (j - 500) / 1001.f; q__1.r = r__1, q__1.i = r__2; ret_val->r = q__1.r, ret_val->i = q__1.i; return ; /* End of CBEG. */ } /* cbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___397 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___398 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___399 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___398.ciunit = infoc_2.nout; s_wsfe(&io___398); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___399.ciunit = infoc_2.nout; s_wsfe(&io___399); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___400.ciunit = infoc_2.nout; s_wsfe(&io___400); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int cblat3_ () { main (); return 0; } blis-1.1/blastest/src/dblat1.c000066400000000000000000001174731474157777200162710ustar00rootroot00000000000000/* dblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static doublereal c_b35 = 1.; static real c_b39 = .03125f; static integer c__5 = 5; static doublereal c_b63 = 0.; static real c_b81 = 0.f; /* > \brief \b DBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM DBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the DOUBLE PRECISION Level 1 BLAS. */ /* > */ /* > Based upon the original BLAS test routine together with: */ /* > F06EAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup double_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "dblat1"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static doublereal sfac = 9.765625e-4; /* Format strings */ static char fmt_99999[] = "(\002 Real BLAS Test Program Results\002,/1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check0_(doublereal *), check1_(doublereal *), check2_(doublereal *), check3_(doublereal *), header_(void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 13; ++ic) { combla_1.icase = ic; header_(); /* .. Initialize PASS, INCX, and INCY for a new case. .. */ /* .. the value 9999 for INCX or INCY will appear in the .. */ /* .. detailed output, if any, for cases that do not involve .. */ /* .. these parameters .. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; if (combla_1.icase == 3 || combla_1.icase == 11) { check0_(&sfac); } else if (combla_1.icase == 7 || combla_1.icase == 8 || combla_1.icase == 9 || combla_1.icase == 10) { check1_(&sfac); } else if (combla_1.icase == 1 || combla_1.icase == 2 || combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase == 12 || combla_1.icase == 13) { check2_(&sfac); } else if (combla_1.icase == 4) { check3_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*13] = " DDOT " "DAXPY " "DROTG " " DROT " "DCOPY " "DSWA" "P " "DNRM2 " "DASUM " "DSCAL " "IDAMAX" "DROTMG" "DROTM " "DSDOT " ; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check0_(doublereal *sfac) { /* Initialized data */ static doublereal ds1[8] = { .8,.6,.8,-.6,.8,0.,1.,0. }; static doublereal datrue[8] = { .5,.5,.5,-.5,-.5,0.,1.,1. }; static doublereal dbtrue[8] = { 0.,.6,0.,-.6,0.,0.,1.,0. }; static doublereal dab[36] /* was [4][9] */ = { .1,.3,1.2,.2,.7,.2,.6, 4.2,0.,0.,0.,0.,4.,-1.,2.,4.,6e-10,.02,1e5,10.,4e10,.02,1e-5,10., 2e-10,.04,1e5,10.,2e10,.04,1e-5,10.,4.,-2.,8.,4. }; static doublereal dtrue[81] /* was [9][9] */ = { 0.,0.,1.3,.2,0.,0.,0.,.5, 0.,0.,0.,4.5,4.2,1.,.5,0.,0.,0.,0.,0.,0.,0.,-2.,0.,0.,0.,0.,0.,0., 0.,4.,-1.,0.,0.,0.,0.,0.,.015,0.,10.,-1.,0.,-1e-4,0.,1.,0.,0., .06144,10.,-1.,4096.,-1e6,0.,1.,0.,0.,15.,10.,-1.,5e-5,0.,1.,0., 0.,0.,15.,10.,-1.,5e5,-4096.,1.,.004096,0.,0.,7.,4.,0.,0.,-.5, -.25,0. }; static doublereal d12 = 4096.; static doublereal da1[8] = { .3,.4,-.3,-.4,-.3,0.,0.,1. }; static doublereal db1[8] = { .4,.3,.4,.3,-.4,0.,1.,0. }; static doublereal dc1[8] = { .6,.8,-.6,.8,.6,1.,0.,1. }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k; doublereal sa, sb, sc, ss, dtemp[9]; extern /* Subroutine */ int drotg_(doublereal *, doublereal *, doublereal *, doublereal *), stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *), drotmg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); /* Fortran I/O blocks */ static cilist io___23 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* INPUT FOR MODIFIED GIVENS */ /* TRUE RESULTS FOR MODIFIED GIVENS */ /* 4096 = 2 ** 12 */ dtrue[0] = .092307692307692313; dtrue[1] = .27692307692307694; dtrue[6] = -.16666666666666666; dtrue[9] = .18666666666666668; dtrue[10] = .65333333333333332; dtrue[17] = .14285714285714285; dtrue[36] = d12 * d12 * 4.5e-10; dtrue[38] = 4e5 / (d12 * 3.); dtrue[41] = 1. / d12; dtrue[43] = 1e4 / (d12 * 3.); dtrue[45] = 4e10 / (d12 * 1.5 * d12); dtrue[46] = .013333333333333334; dtrue[52] = d12 * 5e-7; dtrue[54] = .026666666666666668; dtrue[55] = d12 * d12 * 1.3333333333333334e-10; dtrue[60] = -dtrue[41]; dtrue[62] = 1e4 / d12; dtrue[63] = dtrue[54]; dtrue[64] = 2e10 / (d12 * 1.5 * d12); dtrue[72] = 4.5714285714285712; dtrue[73] = -2.2857142857142856; /* .. Executable Statements .. */ /* Compute true values which cannot be prestored */ /* in decimal notation */ dbtrue[0] = 1.6666666666666667; dbtrue[2] = -1.6666666666666667; dbtrue[4] = 1.6666666666666667; for (k = 1; k <= 8; ++k) { /* .. Set N=K for identification in output if any .. */ combla_1.n = k; if (combla_1.icase == 3) { /* .. DROTG .. */ if (k > 8) { goto L40; } sa = da1[k - 1]; sb = db1[k - 1]; drotg_(&sa, &sb, &sc, &ss); stest1_(&sa, &datrue[k - 1], &datrue[k - 1], sfac); stest1_(&sb, &dbtrue[k - 1], &dbtrue[k - 1], sfac); stest1_(&sc, &dc1[k - 1], &dc1[k - 1], sfac); stest1_(&ss, &ds1[k - 1], &ds1[k - 1], sfac); } else if (combla_1.icase == 11) { /* .. DROTMG .. */ for (i__ = 1; i__ <= 4; ++i__) { dtemp[i__ - 1] = dab[i__ + (k << 2) - 5]; dtemp[i__ + 3] = 0.f; } dtemp[8] = 0.f; drotmg_(dtemp, &dtemp[1], &dtemp[2], &dtemp[3], &dtemp[4]); stest_(&c__9, dtemp, &dtrue[k * 9 - 9], &dtrue[k * 9 - 9], sfac); } else { s_wsle(&io___23); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK0", (ftnlen)28); e_wsle(); s_stop("", (ftnlen)0); } /* L20: */ } L40: return 0; } /* check0_ */ /* Subroutine */ int check1_(doublereal *sfac) { /* Initialized data */ static doublereal sa[10] = { .3,-1.,0.,1.,.3,.3,.3,.3,.3,.3 }; static doublereal dv[80] /* was [8][5][2] */ = { .1,2.,2.,2.,2.,2.,2., 2.,.3,3.,3.,3.,3.,3.,3.,3.,.3,-.4,4.,4.,4.,4.,4.,4.,.2,-.6,.3,5., 5.,5.,5.,5.,.1,-.3,.5,-.1,6.,6.,6.,6.,.1,8.,8.,8.,8.,8.,8.,8.,.3, 9.,9.,9.,9.,9.,9.,9.,.3,2.,-.4,2.,2.,2.,2.,2.,.2,3.,-.6,5.,.3,2., 2.,2.,.1,4.,-.3,6.,-.5,7.,-.1,3. }; static doublereal dtrue1[5] = { 0.,.3,.5,.7,.6 }; static doublereal dtrue3[5] = { 0.,.3,.7,1.1,1. }; static doublereal dtrue5[80] /* was [8][5][2] */ = { .1,2.,2.,2., 2.,2.,2.,2.,-.3,3.,3.,3.,3.,3.,3.,3.,0.,0.,4.,4.,4.,4.,4.,4.,.2, -.6,.3,5.,5.,5.,5.,5.,.03,-.09,.15,-.03,6.,6.,6.,6.,.1,8.,8.,8., 8.,8.,8.,8.,.09,9.,9.,9.,9.,9.,9.,9.,.09,2.,-.12,2.,2.,2.,2.,2., .06,3.,-.18,5.,.09,2.,2.,2.,.03,4.,-.09,6.,-.15,7.,-.03,3. }; static integer itrue2[5] = { 0,1,2,2,3 }; /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; doublereal sx[8]; integer np1, len; extern doublereal dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal dasum_(integer *, doublereal *, integer *); doublereal stemp[1], strue[8]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *), itest1_(integer *, integer *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *); extern integer idamax_(integer *, doublereal *, integer *); /* Fortran I/O blocks */ static cilist io___36 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { sx[i__ - 1] = dv[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L20: */ } if (combla_1.icase == 7) { /* .. DNRM2 .. */ stemp[0] = dtrue1[np1 - 1]; d__1 = dnrm2_(&combla_1.n, sx, &combla_1.incx); stest1_(&d__1, stemp, stemp, sfac); } else if (combla_1.icase == 8) { /* .. DASUM .. */ stemp[0] = dtrue3[np1 - 1]; d__1 = dasum_(&combla_1.n, sx, &combla_1.incx); stest1_(&d__1, stemp, stemp, sfac); } else if (combla_1.icase == 9) { /* .. DSCAL .. */ dscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], sx, &combla_1.incx); i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L40: */ } stest_(&len, sx, strue, strue, sfac); } else if (combla_1.icase == 10) { /* .. IDAMAX .. */ i__1 = idamax_(&combla_1.n, sx, &combla_1.incx); itest1_(&i__1, &itrue2[np1 - 1]); } else { s_wsle(&io___36); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L60: */ } /* L80: */ } return 0; } /* check1_ */ /* Subroutine */ int check2_(doublereal *sfac) { /* Initialized data */ static doublereal sa = .3; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static doublereal dx1[7] = { .6,.1,-.5,.8,.9,-.3,-.4 }; static doublereal dy1[7] = { .5,-.9,.3,.7,-.6,.2,.8 }; static real sx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f }; static real sy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f }; static doublereal dt7[16] /* was [4][4] */ = { 0.,.3,.21,.62,0.,.3,-.07, .85,0.,.3,-.79,-.74,0.,.3,.33,1.27 }; static doublereal dt8[112] /* was [7][4][4] */ = { .5,0.,0.,0.,0.,0.,0., .68,0.,0.,0.,0.,0.,0.,.68,-.87,0.,0.,0.,0.,0.,.68,-.87,.15,.94,0., 0.,0.,.5,0.,0.,0.,0.,0.,0.,.68,0.,0.,0.,0.,0.,0.,.35,-.9,.48,0., 0.,0.,0.,.38,-.9,.57,.7,-.75,.2,.98,.5,0.,0.,0.,0.,0.,0.,.68,0., 0.,0.,0.,0.,0.,.35,-.72,0.,0.,0.,0.,0.,.38,-.63,.15,.88,0.,0.,0., .5,0.,0.,0.,0.,0.,0.,.68,0.,0.,0.,0.,0.,0.,.68,-.9,.33,0.,0.,0., 0.,.68,-.9,.33,.7,-.75,.2,1.04 }; static doublereal dt10x[112] /* was [7][4][4] */ = { .6,0.,0.,0., 0.,0.,0.,.5,0.,0.,0.,0.,0.,0.,.5,-.9,0.,0.,0.,0.,0.,.5,-.9,.3,.7, 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.5,0.,0.,0.,0.,0.,0.,.3,.1,.5,0.,0., 0.,0.,.8,.1,-.6,.8,.3,-.3,.5,.6,0.,0.,0.,0.,0.,0.,.5,0.,0.,0.,0., 0.,0.,-.9,.1,.5,0.,0.,0.,0.,.7,.1,.3,.8,-.9,-.3,.5,.6,0.,0.,0.,0., 0.,0.,.5,0.,0.,0.,0.,0.,0.,.5,.3,0.,0.,0.,0.,0.,.5,.3,-.6,.8,0., 0.,0. }; static doublereal dt10y[112] /* was [7][4][4] */ = { .5,0.,0.,0., 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.6,.1,0.,0.,0.,0.,0.,.6,.1,-.5,.8, 0.,0.,0.,.5,0.,0.,0.,0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,-.5,-.9,.6,0., 0.,0.,0.,-.4,-.9,.9,.7,-.5,.2,.6,.5,0.,0.,0.,0.,0.,0.,.6,0.,0.,0., 0.,0.,0.,-.5,.6,0.,0.,0.,0.,0.,-.4,.9,-.5,.6,0.,0.,0.,.5,0.,0.,0., 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.6,-.9,.1,0.,0.,0.,0.,.6,-.9,.1,.7, -.5,.2,.8 }; static doublereal ssize1[4] = { 0.,.3,1.6,3.2 }; static doublereal ssize2[28] /* was [14][2] */ = { 0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0.,0.,0.,0.,1.17,1.17,1.17,1.17,1.17,1.17,1.17, 1.17,1.17,1.17,1.17,1.17,1.17,1.17 }; static doublereal dpar[20] /* was [5][4] */ = { -2.,0.,0.,0.,0.,-1.,2., -3.,-4.,5.,0.,0.,2.,-3.,0.,1.,5.,2.,0.,-4. }; static struct { doublereal e_1[448]; } equiv_3 = {{ .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, 0., 0., 0., 0., 0., -.8, 3.8, 0., 0., 0., 0., 0., -.9, 2.8, 0., 0., 0., 0., 0., 3.5, -.4, 0., 0., 0., 0., 0., .6, .1, -.5, .8, 0., 0., 0., -.8, 3.8, -2.2, -1.2, 0., 0., 0., -.9, 2.8, -1.4, -1.3, 0., 0., 0., 3.5, -.4, -2.2, 4.7, 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, -.5, 0., 0., 0., 0., 0., .1, -3., 0., 0., 0., 0., -.3, .1, -2., 0., 0., 0., 0., 3.3, .1, -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, -2., .1, 1.4, .8, .6, -.3, -2.8, -1.8, .1, 1.3, .8, 0., -.3, -1.9, 3.8, .1, -3.1, .8, 4.8, -.3, -1.5, .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, -.5, 0., 0., 0., 0., 4.8, .1, -3., 0., 0., 0., 0., 3.3, .1, -2., 0., 0., 0., 0., 2.1, .1, -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, -1.6, .1, -2.2, .8, 5.4, -.3, -2.8, -1.5, .1, -1.4, .8, 3.6, -.3, -1.9, 3.7, .1, -2.2, .8, 3.6, -.3, -1.5, .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, 0., 0., 0., 0., 0., -.8, -1., 0., 0., 0., 0., 0., -.9, -.8, 0., 0., 0., 0., 0., 3.5, .8, 0., 0., 0., 0., 0., .6, .1, -.5, .8, 0., 0., 0., -.8, -1., 1.4, -1.6, 0., 0., 0., -.9, -.8, 1.3, -1.6, 0., 0., 0., 3.5, .8, -3.1, 4.8, 0., 0., 0. }}; static struct { doublereal e_1[448]; } equiv_7 = {{ .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, 0., 0., 0., 0., 0., .7, -4.8, 0., 0., 0., 0., 0., 1.7, -.7, 0., 0., 0., 0., 0., -2.6, 3.5, 0., 0., 0., 0., 0., .5, -.9, .3, .7, 0., 0., 0., .7, -4.8, 3., 1.1, 0., 0., 0., 1.7, -.7, -.7, 2.3, 0., 0., 0., -2.6, 3.5, -.7, -3.6, 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, .3, 0., 0., 0., 0., 4., -.9, -.3, 0., 0., 0., 0., -.5, -.9, 1.5, 0., 0., 0., 0., -1.5, -.9, -1.8, 0., 0., 0., 0., .5, -.9, .3, .7, -.6, .2, .8, 3.7, -.9, -1.2, .7, -1.5, .2, 2.2, -.3, -.9, 2.1, .7, -1.6, .2, 2., -1.6, -.9, -2.1, .7, 2.9, .2, -3.8, .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, 0., 0., 0., 0., 0., 4., -6.3, 0., 0., 0., 0., 0., -.5, .3, 0., 0., 0., 0., 0., -1.5, 3., 0., 0., 0., 0., 0., .5, -.9, .3, .7, 0., 0., 0., 3.7, -7.2, 3., 1.7, 0., 0., 0., -.3, .9, -.7, 1.9, 0., 0., 0., -1.6, 2.7, -.7, -3.4, 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, .3, 0., 0., 0., 0., .7, -.9, 1.2, 0., 0., 0., 0., 1.7, -.9, .5, 0., 0., 0., 0., -2.6, -.9, -1.3, 0., 0., 0., 0., .5, -.9, .3, .7, -.6, .2, .8, .7, -.9, 1.2, .7, -1.5, .2, 1.6, 1.7, -.9, .5, .7, -1.6, .2, 2.4, -2.6, -.9, -1.3, .7, 2.9, .2, -4. }}; /* System generated locals */ integer i__1; real r__1, r__2, r__3; doublereal d__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, j; extern /* Subroutine */ int testdsdot_(real *, real *, real *, real *); integer ki, kn, mx, my; doublereal sx[7], sy[7]; integer kni; doublereal stx[7], sty[7]; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer kpar, lenx, leny; #define dt19x ((doublereal *)&equiv_3) #define dt19y ((doublereal *)&equiv_7) doublereal dtemp[5]; #define dt19xa ((doublereal *)&equiv_3) #define dt19xb ((doublereal *)&equiv_3 + 112) #define dt19xc ((doublereal *)&equiv_3 + 224) #define dt19xd ((doublereal *)&equiv_3 + 336) #define dt19ya ((doublereal *)&equiv_7) #define dt19yb ((doublereal *)&equiv_7 + 112) #define dt19yc ((doublereal *)&equiv_7 + 224) #define dt19yd ((doublereal *)&equiv_7 + 336) extern doublereal dsdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer ksize; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), drotm_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *), dswap_( integer *, doublereal *, integer *, doublereal *, integer *); doublereal ssize[7]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *); /* Fortran I/O blocks */ static cilist io___80 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* **** FGVZ: We have to add separate REAL arrays for DSDOT() because */ /* **** REAL() on an array argument does not translate via f2c. */ /* FOR DROTM */ /* TRUE X RESULTS F0R ROTATIONS DROTM */ /* TRUE Y RESULTS FOR ROTATIONS DROTM */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. Initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; /* **** FGVZ: We have to add a loop to initialize separate REAL arrays */ /* **** for DSDOT() because REAL() on an array argument does not */ /* **** translate via f2c. */ sx1[i__ - 1] = dx1[i__ - 1]; sy1[i__ - 1] = dy1[i__ - 1]; /* L20: */ } if (combla_1.icase == 1) { /* .. DDOT .. */ d__1 = ddot_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy); stest1_(&d__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. DAXPY .. */ daxpy_(&combla_1.n, &sa, sx, &combla_1.incx, sy, & combla_1.incy); i__1 = leny; for (j = 1; j <= i__1; ++j) { sty[j - 1] = dt8[j + (kn + (ki << 2)) * 7 - 36]; /* L40: */ } stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else if (combla_1.icase == 5) { /* .. DCOPY .. */ for (i__ = 1; i__ <= 7; ++i__) { sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L60: */ } dcopy_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 6) { /* .. DSWAP .. */ dswap_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); for (i__ = 1; i__ <= 7; ++i__) { stx[i__ - 1] = dt10x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L80: */ } stest_(&lenx, sx, stx, ssize2, &c_b35); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 12) { /* .. DROTM .. */ kni = kn + (ki - 1 << 2); for (kpar = 1; kpar <= 4; ++kpar) { for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - 36]; sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - 36]; } for (i__ = 1; i__ <= 5; ++i__) { dtemp[i__ - 1] = dpar[i__ + kpar * 5 - 6]; } i__1 = lenx; for (i__ = 1; i__ <= i__1; ++i__) { ssize[i__ - 1] = stx[i__ - 1]; } /* SEE REMARK ABOVE ABOUT DT11X(1,2,7) */ /* AND DT11X(5,3,8). */ if (kpar == 2 && kni == 7) { ssize[0] = 2.4; } if (kpar == 3 && kni == 8) { ssize[4] = 1.8; } drotm_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy, dtemp); stest_(&lenx, sx, stx, ssize, sfac); stest_(&leny, sy, sty, sty, sfac); } } else if (combla_1.icase == 13) { /* .. DSDOT .. */ /* **** CALL TESTDSDOT(REAL(DSDOT(N,REAL(SX),INCX,REAL(SY),INCY)), */ r__1 = (real) dsdot_(&combla_1.n, sx1, &combla_1.incx, sy1, & combla_1.incy); r__2 = (real) dt7[kn + (ki << 2) - 5]; r__3 = (real) ssize1[kn - 1]; testdsdot_(&r__1, &r__2, &r__3, &c_b39); } else { s_wsle(&io___80); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L100: */ } /* L120: */ } return 0; } /* check2_ */ #undef dt19yd #undef dt19yc #undef dt19yb #undef dt19ya #undef dt19xd #undef dt19xc #undef dt19xb #undef dt19xa #undef dt19y #undef dt19x /* Subroutine */ int check3_(doublereal *sfac) { /* Initialized data */ static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static doublereal dx1[7] = { .6,.1,-.5,.8,.9,-.3,-.4 }; static doublereal dy1[7] = { .5,-.9,.3,.7,-.6,.2,.8 }; static doublereal sc = .8; static doublereal ss = .6; static doublereal dt9x[112] /* was [7][4][4] */ = { .6,0.,0.,0.,0.,0.,0., .78,0.,0.,0.,0.,0.,0.,.78,-.46,0.,0.,0.,0.,0.,.78,-.46,-.22,1.06, 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.78,0.,0.,0.,0.,0.,0.,.66,.1,-.1,0., 0.,0.,0.,.96,.1,-.76,.8,.9,-.3,-.02,.6,0.,0.,0.,0.,0.,0.,.78,0., 0.,0.,0.,0.,0.,-.06,.1,-.1,0.,0.,0.,0.,.9,.1,-.22,.8,.18,-.3,-.02, .6,0.,0.,0.,0.,0.,0.,.78,0.,0.,0.,0.,0.,0.,.78,.26,0.,0.,0.,0.,0., .78,.26,-.76,1.12,0.,0.,0. }; static doublereal dt9y[112] /* was [7][4][4] */ = { .5,0.,0.,0.,0.,0.,0., .04,0.,0.,0.,0.,0.,0.,.04,-.78,0.,0.,0.,0.,0.,.04,-.78,.54,.08,0., 0.,0.,.5,0.,0.,0.,0.,0.,0.,.04,0.,0.,0.,0.,0.,0.,.7,-.9,-.12,0., 0.,0.,0.,.64,-.9,-.3,.7,-.18,.2,.28,.5,0.,0.,0.,0.,0.,0.,.04,0., 0.,0.,0.,0.,0.,.7,-1.08,0.,0.,0.,0.,0.,.64,-1.26,.54,.2,0.,0.,0., .5,0.,0.,0.,0.,0.,0.,.04,0.,0.,0.,0.,0.,0.,.04,-.9,.18,0.,0.,0., 0.,.04,-.9,.18,.7,-.18,.2,.16 }; static doublereal ssize2[28] /* was [14][2] */ = { 0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0.,0.,0.,0.,1.17,1.17,1.17,1.17,1.17,1.17,1.17, 1.17,1.17,1.17,1.17,1.17,1.17,1.17 }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k, ki, kn, mx, my; doublereal sx[7], sy[7], stx[7], sty[7]; integer lenx, leny; doublereal mwpc[11]; extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer mwpn[11]; doublereal mwps[11], mwpx[5], mwpy[5]; integer ksize; doublereal copyx[5], copyy[5]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal mwptx[55] /* was [11][5] */, mwpty[55] /* was [11][5] */; integer mwpinx[11], mwpiny[11]; doublereal mwpstx[5], mwpsty[5]; /* Fortran I/O blocks */ static cilist io___104 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; if (combla_1.icase == 4) { /* .. DROT .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt9x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt9y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L20: */ } drot_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy, & sc, &ss); stest_(&lenx, sx, stx, &ssize2[ksize * 14 - 14], sfac); stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else { s_wsle(&io___104); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK3", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } mwpc[0] = 1.; for (i__ = 2; i__ <= 11; ++i__) { mwpc[i__ - 1] = 0.; /* L80: */ } mwps[0] = 0.; for (i__ = 2; i__ <= 6; ++i__) { mwps[i__ - 1] = 1.; /* L100: */ } for (i__ = 7; i__ <= 11; ++i__) { mwps[i__ - 1] = -1.; /* L120: */ } mwpinx[0] = 1; mwpinx[1] = 1; mwpinx[2] = 1; mwpinx[3] = -1; mwpinx[4] = 1; mwpinx[5] = -1; mwpinx[6] = 1; mwpinx[7] = 1; mwpinx[8] = -1; mwpinx[9] = 1; mwpinx[10] = -1; mwpiny[0] = 1; mwpiny[1] = 1; mwpiny[2] = -1; mwpiny[3] = -1; mwpiny[4] = 2; mwpiny[5] = 1; mwpiny[6] = 1; mwpiny[7] = -1; mwpiny[8] = -1; mwpiny[9] = 2; mwpiny[10] = 1; for (i__ = 1; i__ <= 11; ++i__) { mwpn[i__ - 1] = 5; /* L140: */ } mwpn[4] = 3; mwpn[9] = 3; for (i__ = 1; i__ <= 5; ++i__) { mwpx[i__ - 1] = (doublereal) i__; mwpy[i__ - 1] = (doublereal) i__; mwptx[i__ * 11 - 11] = (doublereal) i__; mwpty[i__ * 11 - 11] = (doublereal) i__; mwptx[i__ * 11 - 10] = (doublereal) i__; mwpty[i__ * 11 - 10] = (doublereal) (-i__); mwptx[i__ * 11 - 9] = (doublereal) (6 - i__); mwpty[i__ * 11 - 9] = (doublereal) (i__ - 6); mwptx[i__ * 11 - 8] = (doublereal) i__; mwpty[i__ * 11 - 8] = (doublereal) (-i__); mwptx[i__ * 11 - 6] = (doublereal) (6 - i__); mwpty[i__ * 11 - 6] = (doublereal) (i__ - 6); mwptx[i__ * 11 - 5] = (doublereal) (-i__); mwpty[i__ * 11 - 5] = (doublereal) i__; mwptx[i__ * 11 - 4] = (doublereal) (i__ - 6); mwpty[i__ * 11 - 4] = (doublereal) (6 - i__); mwptx[i__ * 11 - 3] = (doublereal) (-i__); mwpty[i__ * 11 - 3] = (doublereal) i__; mwptx[i__ * 11 - 1] = (doublereal) (i__ - 6); mwpty[i__ * 11 - 1] = (doublereal) (6 - i__); /* L160: */ } mwptx[4] = 1.; mwptx[15] = 3.; mwptx[26] = 5.; mwptx[37] = 4.; mwptx[48] = 5.; mwpty[4] = -1.; mwpty[15] = 2.; mwpty[26] = -2.; mwpty[37] = 4.; mwpty[48] = -3.; mwptx[9] = -1.; mwptx[20] = -3.; mwptx[31] = -5.; mwptx[42] = 4.; mwptx[53] = 5.; mwpty[9] = 1.; mwpty[20] = 2.; mwpty[31] = 2.; mwpty[42] = 4.; mwpty[53] = 3.; for (i__ = 1; i__ <= 11; ++i__) { combla_1.incx = mwpinx[i__ - 1]; combla_1.incy = mwpiny[i__ - 1]; for (k = 1; k <= 5; ++k) { copyx[k - 1] = mwpx[k - 1]; copyy[k - 1] = mwpy[k - 1]; mwpstx[k - 1] = mwptx[i__ + k * 11 - 12]; mwpsty[k - 1] = mwpty[i__ + k * 11 - 12]; /* L180: */ } drot_(&mwpn[i__ - 1], copyx, &combla_1.incx, copyy, &combla_1.incy, & mwpc[i__ - 1], &mwps[i__ - 1]); stest_(&c__5, copyx, mwpstx, mwpstx, sfac); stest_(&c__5, copyy, mwpsty, mwpsty, sfac); /* L200: */ } return 0; } /* check3_ */ /* Subroutine */ int stest_(integer *len, doublereal *scomp, doublereal * strue, doublereal *ssize, doublereal *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY I " " \002,\002 COMP(I) TRUE(I) " " DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,i3,2d36.8,2d12.4)"; /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; doublereal sd; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___121 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___122 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___123 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((d__2 = *sfac * sd, abs(d__2)) <= (d__1 = ssize[i__], abs(d__1)) * d_epsilon_(&c_b63)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___121); e_wsfe(); s_wsfe(&io___122); e_wsfe(); L20: s_wsfe(&io___123); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int testdsdot_(real *scomp, real *strue, real *ssize, real * sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY " " \002,\002 COMP(I) TRUE(I) DIF" "FERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,1i5,i3,2e36.8,2e12.4)"; /* System generated locals */ real r__1; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ real sd; extern real s_epsilon_(); /* Fortran I/O blocks */ static cilist io___125 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___126 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___127 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ sd = *scomp - *strue; if ((r__1 = *sfac * sd, abs(r__1)) <= abs(*ssize) * s_epsilon_(&c_b81)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___125); e_wsfe(); s_wsfe(&io___126); e_wsfe(); L20: s_wsfe(&io___127); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*scomp), (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&(*strue), (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&(*ssize), (ftnlen)sizeof(real)); e_wsfe(); L40: return 0; } /* testdsdot_ */ /* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, doublereal *ssize, doublereal *sfac) { doublereal scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ doublereal sdiff_(doublereal *sa, doublereal *sb) { /* System generated locals */ doublereal ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY " " \002,\002 COMP TRUE " " DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___130 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___131 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___133 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___130); e_wsfe(); s_wsfe(&io___131); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___133); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int dblat1_ () { main (); return 0; } blis-1.1/blastest/src/dblat2.c000066400000000000000000004540371474157777200162720ustar00rootroot00000000000000/* dblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static doublereal c_b120 = 0.; static doublereal c_b128 = 1.; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b DBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM DBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the DOUBLE PRECISION Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 16 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 34 lines: */ /* > 'dblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 0.9 VALUES OF BETAC */ /* > DGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DGER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup double_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "dblat2"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*16] = "DGEMV " "DGBMV " "DSYMV " "DSBMV " "DSPMV " "DTRMV " "DTBMV " "DTPMV " "DTRSV " "DTBSV " "DTPSV " "DGER " "DSYR " "DSPR " "DSYR2 " "DSPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE DOUBLE PRECISION LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9988[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9985[] = "(\002 ERROR IN DMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 DMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublereal a[4225] /* was [65][65] */, g[65]; integer i__, j, n; doublereal x[65], y[65], z__[130], aa[4225]; integer kb[7]; doublereal as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7]; extern logical lde_(doublereal *, doublereal *, integer *); integer inc[7], nkb; doublereal bet[7], eps, err; integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk5_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk6_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchke_(integer *, char *, integer *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; logical ltest[16], sfatal; char snamet[6]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 16; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 16; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b120); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of DMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; a[i__ + j * 65 - 66] = (doublereal) max(i__3,0); /* L110: */ } x[j - 1] = (doublereal) j; y[j - 1] = 0.; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { yy[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L130: */ } /* YY holds the exact result. On exit from DMVCH YT holds */ /* the result computed by DMVCH. */ *(unsigned char *)trans = 'N'; dmvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c__1, &c_b120, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lde_(yy, yt, &n); if (! same || err != 0.) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; dmvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c_n1, &c_b120, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lde_(yy, yt, &n); if (! same || err != 0.) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 16; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { dchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L180; case 14: goto L180; case 15: goto L190; case 16: goto L190; } /* Test DGEMV, 01, and DGBMV, 02. */ L140: dchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test DSYMV, 03, DSBMV, 04, and DSPMV, 05. */ L150: dchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test DTRMV, 06, DTBMV, 07, DTPMV, 08, */ /* DTRSV, 09, DTBSV, 10, and DTPSV, 11. */ L160: dchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test DGER, 12. */ L170: dchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test DSYR, 13, and DSPR, 14. */ L180: dchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test DSYR2, 15, and DSPR2, 16. */ L190: dchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of DBLAT2. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err; integer iku, kls, kus; doublereal beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dgemv_( char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; char trans[1]; logical banded; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DGEMV and DGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl = 0.; dmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl = .5; i__4 = abs(incx); i__5 = nl - 1; dmake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { x[nl / 2] = 0.; xx[abs(incx) * (nl / 2 - 1) + 1] = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.; i__7 = abs(incy); i__8 = ml - 1; dmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als == alpha; isame[4] = lde_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lde_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lde_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lderes_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als == alpha; isame[6] = lde_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lde_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls == beta; if (null) { isame[11] = lde_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lderes_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ dmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of DCHK1. */ } /* dchk1_ */ /* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f4.1," "\002, Y,\002,i2,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, AP\002,\002, X,\002,i2,\002,\002,f4.1" ",\002, Y,\002,i2,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err, beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int dsbmv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); logical reset; integer incxs, incys; extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); char uplos[1]; extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); logical banded, packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DSYMV, DSBMV and DSPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl = 0.; dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__4 = abs(incx); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.; i__7 = abs(incy); i__8 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsymv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dspmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als == alpha; isame[3] = lde_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lde_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls == beta; if (null) { isame[8] = lde_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lderes_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als == alpha; isame[4] = lde_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lde_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lde_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lderes_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als == alpha; isame[3] = lde_(&as[1], &aa[1], &laa); isame[4] = lde_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls == beta; if (null) { isame[7] = lde_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lderes_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ dmvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of DCHK2. */ } /* dchk2_ */ /* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *xt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda; extern logical lde_(doublereal *, doublereal *, integer *); integer ict, icu; doublereal err; char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); char diags[1]; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); logical reset; extern /* Subroutine */ int dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); integer incxs; char trans[1]; extern /* Subroutine */ int dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; extern /* Subroutine */ int dtrsv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DTRMV, DTBMV, DTPMV, DTRSV, DTBSV and DTPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero vector for DMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { z__[i__] = 0.; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl = 0.; dmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__4 = abs(incx); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lde_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lde_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lderes_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lde_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lderes_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lde_(&as[1], &aa[1], &laa); if (null) { isame[5] = lde_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lderes_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ dmvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &x[1], &incx, & c_b120, &z__[1], &incx, &xt[1], & g[1], &xx[1], eps, &err, fatal, nout, &c_true, (ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = xx[(i__ - 1) * abs(incx) + 1]; xx[(i__ - 1) * abs(incx) + 1] = x[i__] ; /* L50: */ } dmvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &z__[1], &incx, & c_b120, &x[1], &incx, &xt[1], &g[ 1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of DCHK3. */ } /* dchk3_ */ /* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002,\002)" ",f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n; doublereal w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, err; extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); integer ldas; logical same; integer incx, incy; logical null; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___284 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___285 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___292 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DGER. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Define the number of arguments. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl = .5; i__3 = abs(incx); i__4 = m - 1; dmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { x[m / 2] = 0.; xx[abs(incx) * (m / 2 - 1) + 1] = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.; i__4 = abs(incy); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.; yy[abs(incy) * (n / 2 - 1) + 1] = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ transl = 0.; i__5 = m - 1; i__6 = n - 1; dmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___284.ciunit = *ntra; s_wsfe(&io___284); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dger_(&m, &n, &alpha, &xx[1], &incx, &yy[1], &incy, & aa[1], &lda); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___285.ciunit = *nout; s_wsfe(&io___285); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lde_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lde_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lde_(&as[1], &aa[1], &laa); } else { isame[7] = lderes_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[i__]; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[m - i__ + 1]; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { w[0] = y[j]; } else { w[0] = y[n - j + 1]; } dmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b128, &a[j * a_dim1 + 1], &c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___292.ciunit = *nout; s_wsfe(&io___292); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L150; L130: io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of DCHK4. */ } /* dchk4_ */ /* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; doublereal w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, err; integer ldas; logical same; integer incx; logical full; extern /* Subroutine */ int dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, ftnlen); logical null; char uplo[1]; extern /* Subroutine */ int dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen), dmake_( char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs; logical upper; char uplos[1]; logical packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___324 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___325 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___329 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYR and DSPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__3 = abs(incx); i__4 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.; /* Generate the matrix A. */ transl = 0.; i__4 = n - 1; i__5 = n - 1; dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___324.ciunit = *ntra; s_wsfe(&io___324); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], &lda, ( ftnlen)1); } else if (packed) { if (*trace) { io___325.ciunit = *ntra; s_wsfe(&io___325); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dspr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___326.ciunit = *nout; s_wsfe(&io___326); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lde_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lde_(&as[1], &aa[1], &laa); } else { isame[5] = lderes_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___329.ciunit = *nout; s_wsfe(&io___329); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[i__]; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[n - i__ + 1]; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { w[0] = z__[j]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } dmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b128, &a[jj + j * a_dim1], & c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of DCHK5. */ } /* dchk5_ */ /* Subroutine */ int dchk6_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; doublereal w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, err; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int dspr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, ftnlen), dsyr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen), dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; logical upper; char uplos[1]; logical packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___373 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___374 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___375 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___378 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___386 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYR2 and DSPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__3 = abs(incx); i__4 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.; i__4 = abs(incy); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.; yy[abs(incy) * (n / 2 - 1) + 1] = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.; /* Generate the matrix A. */ transl = 0.; i__5 = n - 1; i__6 = n - 1; dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___373.ciunit = *ntra; s_wsfe(&io___373); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___374.ciunit = *ntra; s_wsfe(&io___374); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dspr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___375.ciunit = *nout; s_wsfe(&io___375); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lde_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lde_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lde_(&as[1], &aa[1], &laa); } else { isame[7] = lderes_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___378.ciunit = *nout; s_wsfe(&io___378); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[i__]; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[n - i__ + 1]; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[i__]; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[n - i__ + 1]; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { w[0] = z__[j + (z_dim1 << 1)]; w[1] = z__[j + z_dim1]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } dmvch_("N", &lj, &c__2, &alpha, &z__[jj + z_dim1], nmax, w, &c__1, &c_b128, &a[ jj + j * a_dim1], &c__1, &yt[1], &g[1] , &aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___386.ciunit = *nout; s_wsfe(&io___386); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L170; L150: io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of DCHK6. */ } /* dchk6_ */ /* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, ftnlen), dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen), dspr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, ftnlen), dsyr2_( char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen); doublereal alpha; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dgemv_( char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dsbmv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dtrsv_( char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___396 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___397 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; } L10: infoc_1.infot = 1; dgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L20: infoc_1.infot = 1; dgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L30: infoc_1.infot = 1; dsymv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsymv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dsymv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsymv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L40: infoc_1.infot = 1; dsbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dsbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dsbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dsbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L50: infoc_1.infot = 1; dspmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dspmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dspmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dspmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L60: infoc_1.infot = 1; dtrmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dtrmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L70: infoc_1.infot = 1; dtbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L80: infoc_1.infot = 1; dtpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L90: infoc_1.infot = 1; dtrsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dtrsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L100: infoc_1.infot = 1; dtbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L110: infoc_1.infot = 1; dtpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L120: infoc_1.infot = 1; dger_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dger_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dger_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dger_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dger_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L130: infoc_1.infot = 1; dsyr_("/", &c__0, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyr_("U", &c_n1, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dsyr_("U", &c__0, &alpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr_("U", &c__2, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L140: infoc_1.infot = 1; dspr_("/", &c__0, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dspr_("U", &c_n1, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dspr_("U", &c__0, &alpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L150: infoc_1.infot = 1; dsyr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dsyr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L160: infoc_1.infot = 1; dspr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dspr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dspr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dspr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L170: if (infoc_1.ok) { io___396.ciunit = *nout; s_wsfe(&io___396); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of DCHKE. */ } /* dchke_ */ /* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *nmax, doublereal *aa, integer * lda, integer *kl, integer *ku, logical *reset, doublereal *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, kk; logical gen, tri, sym; extern doublereal dbeg_(logical *); integer ibeg, iend, ioff; logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'S'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { a[i__ + j * a_dim1] = dbeg_(reset) + *transl; } else { a[i__ + j * a_dim1] = 0.; } if (i__ != j) { if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.; } if (unit) { a[j + j * a_dim1] = 1.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { aa[i1 + (j - 1) * *lda] = -1e10; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { aa[i2 + (j - 1) * *lda] = a[i2 + j - *ku - 1 + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { aa[i3 + (j - 1) * *lda] = -1e10; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L120: */ } /* L130: */ } } else if (s_cmp(type__, "SB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j - kk + j * a_dim1]; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L160: */ } /* L170: */ } } else if (s_cmp(type__, "SP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; aa[ioff] = a[i__ + j * a_dim1]; if (i__ == j) { if (unit) { aa[ioff] = -1e10; } } /* L180: */ } /* L190: */ } } return 0; /* End of DMAKE. */ } /* dmake_ */ /* Subroutine */ int dmvch_(char *trans, integer *m, integer *n, doublereal * alpha, doublereal *a, integer *nmax, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy, doublereal *yt, doublereal *g, doublereal *yy, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; doublereal erri; logical tran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___425 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___426 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___427 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T' || *(unsigned char *)trans == 'C'; if (tran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { yt[iy] = 0.; g[iy] = 0.; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[j + i__ * a_dim1] * x[jx]; g[iy] += (d__1 = a[j + i__ * a_dim1] * x[jx], abs(d__1)); jx += incxl; /* L10: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[i__ + j * a_dim1] * x[jx]; g[iy] += (d__1 = a[i__ + j * a_dim1] * x[jx], abs(d__1)); jx += incxl; /* L20: */ } } yt[iy] = *alpha * yt[iy] + *beta * y[iy]; g[iy] = abs(*alpha) * g[iy] + (d__1 = *beta * y[iy], abs(d__1)); iy += incyl; /* L30: */ } /* Compute the error ratio for this result. */ *err = 0.; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { erri = (d__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(d__1)) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L50; } /* L40: */ } /* If the loop completes, all results are at least half accurate. */ goto L70; /* Report fatal error. */ L50: *fatal = TRUE_; io___425.ciunit = *nout; s_wsfe(&io___425); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___426.ciunit = *nout; s_wsfe(&io___426); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); e_wsfe(); } else { io___427.ciunit = *nout; s_wsfe(&io___427); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L60: */ } L70: return 0; /* End of DMVCH. */ } /* dmvch_ */ logical lde_(doublereal *ri, doublereal *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LDE. */ } /* lde_ */ logical lderes_(char *type__, char *uplo, integer *m, integer *n, doublereal * aa, doublereal *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'SY' or 'SP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LDERES. */ } /* lderes_ */ doublereal dbeg_(logical *reset) { /* System generated locals */ doublereal ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (doublereal) (i__ - 500) / 1001.; return ret_val; /* End of DBEG. */ } /* dbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___437 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___437.ciunit = *nout; s_wsfe(&io___437); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___438 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___439 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___440 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___438.ciunit = infoc_2.nout; s_wsfe(&io___438); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___439.ciunit = infoc_2.nout; s_wsfe(&io___439); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___440.ciunit = infoc_2.nout; s_wsfe(&io___440); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int dblat2_ () { main (); return 0; } blis-1.1/blastest/src/dblat3.c000066400000000000000000004360551474157777200162730ustar00rootroot00000000000000/* dblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static doublereal c_b86 = 0.; static doublereal c_b96 = 1.; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; static integer c__2 = 2; /* > \brief \b DBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM DBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the DOUBLE PRECISION Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 6 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 20 lines: */ /* > 'dblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 1.3 VALUES OF BETA */ /* > DGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup double_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "dblat3"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*6] = "DGEMM " "DSYMM " "DTRMM " "DTRSM " "DSYRK " "DSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE DOUBLE PRECISION LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9992[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9989[] = "(\002 ERROR IN DMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 DMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublereal c__[4225] /* was [65][65] */, g[65]; integer i__, j, n; doublereal w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[4225], as[4225], bs[4225], cs[4225], ct[65], alf[7]; extern logical lde_(doublereal *, doublereal *, integer *); doublereal bet[7], eps, err; integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk5_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchke_(integer *, char *, integer *, ftnlen); logical fatal; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical trace; integer nidim; char snaps[32]; integer isnum; logical ltest[6], sfatal; char snamet[6], transa[1], transb[1]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 6; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 6; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b86); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of DMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; ab[i__ + j * 65 - 66] = (doublereal) max(i__3,0); /* L90: */ } ab[j + 4224] = (doublereal) j; ab[(j + 65) * 65 - 65] = (doublereal) j; c__[j - 1] = 0.; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L110: */ } /* CC holds the exact result. On exit from DMMCH CT holds */ /* the result computed by DMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { ab[j + 4224] = (doublereal) (n - j + 1); ab[(j + 65) * 65 - 65] = (doublereal) (n - j + 1); /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[n - j] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L130: */ } *(unsigned char *)transa = 'T'; *(unsigned char *)transb = 'N'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 6; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { dchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L160; case 4: goto L160; case 5: goto L170; case 6: goto L180; } /* Test DGEMM, 01. */ L140: dchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DSYMM, 02. */ L150: dchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DTRMM, 03, DTRSM, 04. */ L160: dchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test DSYRK, 05. */ L170: dchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DSYR2K, 06. */ L180: dchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of DBLAT3. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002" ",i3,\002,\002,f4.1,\002, \002,\002C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err, beta; integer ldas, ldbs, ldcs; logical same, null; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen), dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); logical isame[13], trana, tranb; integer nargs; logical reset; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char tranas[1], tranbs[1], transa[1], transb[1]; doublereal errmax; /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ dmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ dmake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b86, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als = alpha; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als == alpha; isame[6] = lde_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lde_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls == beta; if (null) { isame[11] = lde_(&cs[1], &cc[1], &lcc); } else { isame[11] = lderes_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ dmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of DCHK1. */ } /* dchk1_ */ /* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); integer ics; doublereal als, bls; integer icu; doublereal err, beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical left, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax; /* Fortran I/O blocks */ static cilist io___171 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___172 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___175 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___177 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ dmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b86, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the symmetric matrix A. */ dmake_("SY", uplo, " ", &na, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___171.ciunit = *ntra; s_wsfe(&io___171); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsymm_(side, uplo, &m, &n, &alpha, &aa[1], &lda, & bb[1], &ldb, &beta, &cc[1], &ldc, (ftnlen) 1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___172.ciunit = *nout; s_wsfe(&io___172); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als == alpha; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lde_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls == beta; if (null) { isame[10] = lde_(&cs[1], &cc[1], &lcc); } else { isame[10] = lderes_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___175.ciunit = *nout; s_wsfe(&io___175); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { dmmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { dmmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___177.ciunit = *nout; s_wsfe(&io___177); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L120; L110: io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of DCHK2. */ } /* dchk2_ */ /* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *ct, doublereal *g, doublereal *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb; extern logical lde_(doublereal *, doublereal *, integer *); integer ics; doublereal als; integer ict, icu; doublereal err; char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; char diags[1]; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dtrsm_( char *, char *, char *, char *, integer *, integer *, doublereal * , doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char tranas[1], transa[1]; doublereal errmax; /* Fortran I/O blocks */ static cilist io___221 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___226 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___228 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DTRMM and DTRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero matrix for DMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ dmake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); /* Generate the matrix B. */ dmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { bs[i__] = bb[i__]; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___221.ciunit = *ntra; s_wsfe(&io___221); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___223.ciunit = *nout; s_wsfe(&io___223); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als == alpha; isame[7] = lde_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lde_(&bs[1], &bb[1], &lbb); } else { isame[9] = lderes_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___226.ciunit = *nout; s_wsfe(&io___226); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { dmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b86, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { dmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b86, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { c__[i__ + j * c_dim1] = bb[i__ + (j - 1) * ldb]; bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * b_dim1]; /* L60: */ } /* L70: */ } if (left) { dmmch_(transa, "N", &m, &n, &m, & c_b96, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b86, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { dmmch_("N", transa, &m, &n, &n, & c_b96, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b86, &b[b_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_false, ( ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___228.ciunit = *nout; s_wsfe(&io___228); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L150: io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of DCHK3. */ } /* dchk3_ */ /* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als; integer ict, icu; doublereal err, beta; integer ldas, ldcs; logical same; doublereal bets; logical tran, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; integer nargs; logical reset; char trans[1]; logical upper; extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax; char transs[1]; /* Fortran I/O blocks */ static cilist io___268 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___269 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___272 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___278 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___280 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___281 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___282 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 10; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ dmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___268.ciunit = *ntra; s_wsfe(&io___268); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &beta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1) ; /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___269.ciunit = *nout; s_wsfe(&io___269); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = bets == beta; if (null) { isame[8] = lde_(&cs[1], &cc[1], &lcc); } else { isame[8] = lderes_("SY", uplo, &n, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___272.ciunit = *nout; s_wsfe(&io___272); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { dmmch_("T", "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { dmmch_("N", "T", &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___278.ciunit = *nout; s_wsfe(&io___278); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: if (n > 1) { io___280.ciunit = *nout; s_wsfe(&io___280); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___281.ciunit = *nout; s_wsfe(&io___281); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___282.ciunit = *nout; s_wsfe(&io___282); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of DCHK4. */ } /* dchk4_ */ /* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *ab, doublereal *aa, doublereal *as, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, doublereal *w, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als; integer ict, icu; doublereal err; integer jjab; doublereal beta; integer ldas, ldbs, ldcs; logical same; doublereal bets; logical tran, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int dsyr2k_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax; char transs[1]; /* Fortran I/O blocks */ static cilist io___322 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___323 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___333 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___334 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; dmake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { dmake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; dmake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { dmake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b86, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___322.ciunit = *ntra; s_wsfe(&io___322); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &bb[1], &ldb, &beta, &cc[1], &ldc, ( ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___323.ciunit = *nout; s_wsfe(&io___323); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lde_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bets == beta; if (null) { isame[10] = lde_(&cs[1], &cc[1], &lcc); } else { isame[10] = lderes_("SY", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___326.ciunit = *nout; s_wsfe(&io___326); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(j - 1 << 1) * *nmax + k + i__]; w[k + i__] = ab[(j - 1 << 1) * * nmax + i__]; /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; dmmch_("T", "N", &lj, &c__1, &i__6, & alpha, &ab[jjab], &i__7, &w[1] , &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(k + i__ - 1) * *nmax + j]; w[k + i__] = ab[(i__ - 1) * *nmax + j]; /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; dmmch_("N", "N", &lj, &c__1, &i__6, & alpha, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___333.ciunit = *nout; s_wsfe(&io___333); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___334.ciunit = *nout; s_wsfe(&io___334); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L140: if (n > 1) { io___335.ciunit = *nout; s_wsfe(&io___335); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of DCHK5. */ } /* dchk5_ */ /* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), dsyr2k_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___343 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___344 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA and BETA (eca) */ /* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA and BETA. */ alpha = 1.; beta = 2.; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; } L10: infoc_1.infot = 1; dgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; dgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L20: infoc_1.infot = 1; dsymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L30: infoc_1.infot = 1; dtrmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L40: infoc_1.infot = 1; dtrsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L50: infoc_1.infot = 1; dsyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyrk_("U", "/", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L60: infoc_1.infot = 1; dsyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyr2k_("U", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L70: if (infoc_1.ok) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___344.ciunit = *nout; s_wsfe(&io___344); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of DCHKE. */ } /* dchke_ */ /* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *nmax, doublereal *aa, integer * lda, logical *reset, doublereal *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j; logical gen, tri, sym; extern doublereal dbeg_(logical *); integer ibeg, iend; logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { a[i__ + j * a_dim1] = dbeg_(reset) + *transl; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { a[i__ + j * a_dim1] = 0.; } if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.; } if (unit) { a[j + j * a_dim1] = 1.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L80: */ } /* L90: */ } } return 0; /* End of DMAKE. */ } /* dmake_ */ /* Subroutine */ int dmmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, doublereal *ct, doublereal *g, doublereal *cc, integer * ldcc, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; doublereal erri; logical trana, tranb; /* Fortran I/O blocks */ static cilist io___361 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___362 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___363 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___364 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = 0.; g[i__] = 0.; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[k + j * b_dim1]; g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = b[k + j * b_dim1], abs(d__2)); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[k + j * b_dim1]; g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 = b[k + j * b_dim1], abs(d__2)); /* L40: */ } /* L50: */ } } else if (! trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[j + k * b_dim1]; g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = b[j + k * b_dim1], abs(d__2)); /* L60: */ } /* L70: */ } } else if (trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[j + k * b_dim1]; g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 = b[j + k * b_dim1], abs(d__2)); /* L80: */ } /* L90: */ } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = *alpha * ct[i__] + *beta * c__[i__ + j * c_dim1]; g[i__] = abs(*alpha) * g[i__] + abs(*beta) * (d__1 = c__[i__ + j * c_dim1], abs(d__1)); /* L100: */ } /* Compute the error ratio for this result. */ *err = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { erri = (d__1 = ct[i__] - cc[i__ + j * cc_dim1], abs(d__1)) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L130; } /* L110: */ } /* L120: */ } /* If the loop completes, all results are at least half accurate. */ goto L150; /* Report fatal error. */ L130: *fatal = TRUE_; io___361.ciunit = *nout; s_wsfe(&io___361); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___362.ciunit = *nout; s_wsfe(&io___362); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); e_wsfe(); } else { io___363.ciunit = *nout; s_wsfe(&io___363); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L140: */ } if (*n > 1) { io___364.ciunit = *nout; s_wsfe(&io___364); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: return 0; /* End of DMMCH. */ } /* dmmch_ */ logical lde_(doublereal *ri, doublereal *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LDE. */ } /* lde_ */ logical lderes_(char *type__, char *uplo, integer *m, integer *n, doublereal * aa, doublereal *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LDERES. */ } /* lderes_ */ doublereal dbeg_(logical *reset) { /* System generated locals */ doublereal ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (i__ - 500) / 1001.; return ret_val; /* End of DBEG. */ } /* dbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___374 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___374.ciunit = *nout; s_wsfe(&io___374); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___375.ciunit = infoc_2.nout; s_wsfe(&io___375); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___376.ciunit = infoc_2.nout; s_wsfe(&io___376); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___377.ciunit = infoc_2.nout; s_wsfe(&io___377); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int dblat3_ () { main (); return 0; } blis-1.1/blastest/src/fortran/000077500000000000000000000000001474157777200164145ustar00rootroot00000000000000blis-1.1/blastest/src/fortran/cblat1.f000066400000000000000000000765551474157777200177530ustar00rootroot00000000000000*> \brief \b CBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM CBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX Level 1 BLAS. *> Based upon the original BLAS test routine together with: *> *> F06GAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex_blas_testing * * ===================================================================== PROGRAM CBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. REAL SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK1, CHECK2, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SFAC/9.765625E-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 10 ICASE = IC CALL HEADER * * Initialize PASS, INCX, INCY, and MODE for a new case. * The value 9999 for INCX, INCY or MODE will appear in the * detailed output, if any, for cases that do not involve * these parameters. * PASS = .TRUE. INCX = 9999 INCY = 9999 MODE = 9999 IF (ICASE.LE.5) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.GE.6) THEN CALL CHECK1(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Complex BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(10) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA L(1)/'CDOTC '/ DATA L(2)/'CDOTU '/ DATA L(3)/'CAXPY '/ DATA L(4)/'CCOPY '/ DATA L(5)/'CSWAP '/ DATA L(6)/'SCNRM2'/ DATA L(7)/'SCASUM'/ DATA L(8)/'CSCAL '/ DATA L(9)/'CSSCAL'/ DATA L(10)/'ICAMAX'/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX CA REAL SA INTEGER I, J, LEN, NP1 * .. Local Arrays .. COMPLEX CTRUE5(8,5,2), CTRUE6(8,5,2), CV(8,5,2), CX(8), + MWPCS(5), MWPCT(5) REAL STRUE2(5), STRUE4(5) INTEGER ITRUE3(5) * .. External Functions .. REAL SCASUM, SCNRM2 INTEGER ICAMAX EXTERNAL SCASUM, SCNRM2, ICAMAX * .. External Subroutines .. EXTERNAL CSCAL, CSSCAL, CTEST, ITEST1, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SA, CA/0.3E0, (0.4E0,-0.7E0)/ DATA ((CV(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (0.3E0,-0.4E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (0.1E0,-0.3E0), (0.5E0,-0.1E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (0.1E0,0.1E0), + (-0.6E0,0.1E0), (0.1E0,-0.3E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.5E0,0.0E0), + (0.0E0,0.5E0), (0.0E0,0.2E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (0.3E0,-0.4E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (0.1E0,-0.3E0), (8.0E0,9.0E0), (0.5E0,-0.1E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (0.1E0,0.1E0), + (3.0E0,6.0E0), (-0.6E0,0.1E0), (4.0E0,7.0E0), + (0.1E0,-0.3E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.3E0,0.1E0), (5.0E0,8.0E0), + (0.5E0,0.0E0), (6.0E0,9.0E0), (0.0E0,0.5E0), + (8.0E0,3.0E0), (0.0E0,0.2E0), (9.0E0,4.0E0)/ DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.8E0/ DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.6E0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (-0.16E0,-0.37E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (-0.17E0,-0.19E0), (0.13E0,-0.39E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (0.11E0,-0.03E0), (-0.17E0,0.46E0), + (-0.17E0,-0.19E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (0.19E0,-0.17E0), (0.20E0,-0.35E0), + (0.35E0,0.20E0), (0.14E0,0.08E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0), + (2.0E0,3.0E0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (-0.16E0,-0.37E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (-0.17E0,-0.19E0), (8.0E0,9.0E0), + (0.13E0,-0.39E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (0.11E0,-0.03E0), (3.0E0,6.0E0), + (-0.17E0,0.46E0), (4.0E0,7.0E0), + (-0.17E0,-0.19E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.19E0,-0.17E0), (5.0E0,8.0E0), + (0.20E0,-0.35E0), (6.0E0,9.0E0), + (0.35E0,0.20E0), (8.0E0,3.0E0), + (0.14E0,0.08E0), (9.0E0,4.0E0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (0.09E0,-0.12E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (0.03E0,-0.09E0), (0.15E0,-0.03E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (0.03E0,0.03E0), (-0.18E0,0.03E0), + (0.03E0,-0.09E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (0.09E0,0.03E0), (0.15E0,0.00E0), + (0.00E0,0.15E0), (0.00E0,0.06E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (0.09E0,-0.12E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (0.03E0,-0.09E0), (8.0E0,9.0E0), + (0.15E0,-0.03E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (0.03E0,0.03E0), (3.0E0,6.0E0), + (-0.18E0,0.03E0), (4.0E0,7.0E0), + (0.03E0,-0.09E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.09E0,0.03E0), (5.0E0,8.0E0), + (0.15E0,0.00E0), (6.0E0,9.0E0), (0.00E0,0.15E0), + (8.0E0,3.0E0), (0.00E0,0.06E0), (9.0E0,4.0E0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 DO 40 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN CX(I) = CV(I,NP1,INCX) 20 CONTINUE IF (ICASE.EQ.6) THEN * .. SCNRM2 .. CALL STEST1(SCNRM2(N,CX,INCX),STRUE2(NP1),STRUE2(NP1), + SFAC) ELSE IF (ICASE.EQ.7) THEN * .. SCASUM .. CALL STEST1(SCASUM(N,CX,INCX),STRUE4(NP1),STRUE4(NP1), + SFAC) ELSE IF (ICASE.EQ.8) THEN * .. CSCAL .. CALL CSCAL(N,CA,CX,INCX) CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.9) THEN * .. CSSCAL .. CALL CSSCAL(N,SA,CX,INCX) CALL CTEST(LEN,CX,CTRUE6(1,NP1,INCX),CTRUE6(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.10) THEN * .. ICAMAX .. CALL ITEST1(ICAMAX(N,CX,INCX),ITRUE3(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF * 40 CONTINUE 60 CONTINUE * INCX = 1 IF (ICASE.EQ.8) THEN * CSCAL * Add a test for alpha equal to zero. CA = (0.0E0,0.0E0) DO 80 I = 1, 5 MWPCT(I) = (0.0E0,0.0E0) MWPCS(I) = (1.0E0,1.0E0) 80 CONTINUE CALL CSCAL(5,CA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) ELSE IF (ICASE.EQ.9) THEN * CSSCAL * Add a test for alpha equal to zero. SA = 0.0E0 DO 100 I = 1, 5 MWPCT(I) = (0.0E0,0.0E0) MWPCS(I) = (1.0E0,1.0E0) 100 CONTINUE CALL CSSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to one. SA = 1.0E0 DO 120 I = 1, 5 MWPCT(I) = CX(I) MWPCS(I) = CX(I) 120 CONTINUE CALL CSSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to minus one. SA = -1.0E0 DO 140 I = 1, 5 MWPCT(I) = -CX(I) MWPCS(I) = -CX(I) 140 CONTINUE CALL CSSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) END IF RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX CA INTEGER I, J, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. COMPLEX CDOT(1), CSIZE1(4), CSIZE2(7,2), CSIZE3(14), + CT10X(7,4,4), CT10Y(7,4,4), CT6(4,4), CT7(4,4), + CT8(7,4,4), CX(7), CX1(7), CY(7), CY1(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. COMPLEX CDOTC, CDOTU EXTERNAL CDOTC, CDOTU * .. External Subroutines .. EXTERNAL CAXPY, CCOPY, CSWAP, CTEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA CA/(0.4E0,-0.7E0)/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA CX1/(0.7E0,-0.8E0), (-0.4E0,-0.7E0), + (-0.1E0,-0.9E0), (0.2E0,-0.8E0), + (-0.9E0,-0.4E0), (0.1E0,0.4E0), (-0.6E0,0.6E0)/ DATA CY1/(0.6E0,-0.6E0), (-0.9E0,0.5E0), + (0.7E0,-0.6E0), (0.1E0,-0.5E0), (-0.1E0,-0.2E0), + (-0.5E0,-0.3E0), (0.8E0,-0.7E0)/ DATA ((CT8(I,J,1),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.32E0,-1.41E0), + (-1.55E0,0.5E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (-1.55E0,0.5E0), + (0.03E0,-0.89E0), (-0.38E0,-0.96E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT8(I,J,2),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.07E0,-0.89E0), + (-0.9E0,0.5E0), (0.42E0,-1.41E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.78E0,0.06E0), (-0.9E0,0.5E0), + (0.06E0,-0.13E0), (0.1E0,-0.5E0), + (-0.77E0,-0.49E0), (-0.5E0,-0.3E0), + (0.52E0,-1.51E0)/ DATA ((CT8(I,J,3),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.07E0,-0.89E0), + (-1.18E0,-0.31E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.78E0,0.06E0), (-1.54E0,0.97E0), + (0.03E0,-0.89E0), (-0.18E0,-1.31E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT8(I,J,4),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.32E0,-1.41E0), (-0.9E0,0.5E0), + (0.05E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.32E0,-1.41E0), + (-0.9E0,0.5E0), (0.05E0,-0.6E0), (0.1E0,-0.5E0), + (-0.77E0,-0.49E0), (-0.5E0,-0.3E0), + (0.32E0,-1.16E0)/ DATA CT7/(0.0E0,0.0E0), (-0.06E0,-0.90E0), + (0.65E0,-0.47E0), (-0.34E0,-1.22E0), + (0.0E0,0.0E0), (-0.06E0,-0.90E0), + (-0.59E0,-1.46E0), (-1.04E0,-0.04E0), + (0.0E0,0.0E0), (-0.06E0,-0.90E0), + (-0.83E0,0.59E0), (0.07E0,-0.37E0), + (0.0E0,0.0E0), (-0.06E0,-0.90E0), + (-0.76E0,-1.15E0), (-1.33E0,-1.82E0)/ DATA CT6/(0.0E0,0.0E0), (0.90E0,0.06E0), + (0.91E0,-0.77E0), (1.80E0,-0.10E0), + (0.0E0,0.0E0), (0.90E0,0.06E0), (1.45E0,0.74E0), + (0.20E0,0.90E0), (0.0E0,0.0E0), (0.90E0,0.06E0), + (-0.55E0,0.23E0), (0.83E0,-0.39E0), + (0.0E0,0.0E0), (0.90E0,0.06E0), (1.04E0,0.79E0), + (1.95E0,1.22E0)/ DATA ((CT10X(I,J,1),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.6E0,-0.6E0), (-0.9E0,0.5E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.6E0,-0.6E0), + (-0.9E0,0.5E0), (0.7E0,-0.6E0), (0.1E0,-0.5E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT10X(I,J,2),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.7E0,-0.6E0), (-0.4E0,-0.7E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.8E0,-0.7E0), + (-0.4E0,-0.7E0), (-0.1E0,-0.2E0), + (0.2E0,-0.8E0), (0.7E0,-0.6E0), (0.1E0,0.4E0), + (0.6E0,-0.6E0)/ DATA ((CT10X(I,J,3),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.9E0,0.5E0), (-0.4E0,-0.7E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.1E0,-0.5E0), + (-0.4E0,-0.7E0), (0.7E0,-0.6E0), (0.2E0,-0.8E0), + (-0.9E0,0.5E0), (0.1E0,0.4E0), (0.6E0,-0.6E0)/ DATA ((CT10X(I,J,4),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.6E0,-0.6E0), (0.7E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.6E0,-0.6E0), + (0.7E0,-0.6E0), (-0.1E0,-0.2E0), (0.8E0,-0.7E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT10Y(I,J,1),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.7E0,-0.8E0), (-0.4E0,-0.7E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.7E0,-0.8E0), + (-0.4E0,-0.7E0), (-0.1E0,-0.9E0), + (0.2E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0)/ DATA ((CT10Y(I,J,2),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.1E0,-0.9E0), (-0.9E0,0.5E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (-0.6E0,0.6E0), + (-0.9E0,0.5E0), (-0.9E0,-0.4E0), (0.1E0,-0.5E0), + (-0.1E0,-0.9E0), (-0.5E0,-0.3E0), + (0.7E0,-0.8E0)/ DATA ((CT10Y(I,J,3),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.1E0,-0.9E0), (0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (-0.6E0,0.6E0), + (-0.9E0,-0.4E0), (-0.1E0,-0.9E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0)/ DATA ((CT10Y(I,J,4),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.7E0,-0.8E0), (-0.9E0,0.5E0), + (-0.4E0,-0.7E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.7E0,-0.8E0), + (-0.9E0,0.5E0), (-0.4E0,-0.7E0), (0.1E0,-0.5E0), + (-0.1E0,-0.9E0), (-0.5E0,-0.3E0), + (0.2E0,-0.8E0)/ DATA CSIZE1/(0.0E0,0.0E0), (0.9E0,0.9E0), + (1.63E0,1.73E0), (2.90E0,2.78E0)/ DATA CSIZE3/(0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (1.17E0,1.17E0), + (1.17E0,1.17E0), (1.17E0,1.17E0), + (1.17E0,1.17E0), (1.17E0,1.17E0), + (1.17E0,1.17E0), (1.17E0,1.17E0)/ DATA CSIZE2/(0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (1.54E0,1.54E0), + (1.54E0,1.54E0), (1.54E0,1.54E0), + (1.54E0,1.54E0), (1.54E0,1.54E0), + (1.54E0,1.54E0), (1.54E0,1.54E0)/ * .. Executable Statements .. DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. initialize all argument arrays .. DO 20 I = 1, 7 CX(I) = CX1(I) CY(I) = CY1(I) 20 CONTINUE IF (ICASE.EQ.1) THEN * .. CDOTC .. CDOT(1) = CDOTC(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT6(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.2) THEN * .. CDOTU .. CDOT(1) = CDOTU(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT7(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.3) THEN * .. CAXPY .. CALL CAXPY(N,CA,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT8(1,KN,KI),CSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.4) THEN * .. CCOPY .. CALL CCOPY(N,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0E0) ELSE IF (ICASE.EQ.5) THEN * .. CSWAP .. CALL CSWAP(N,CX,INCX,CY,INCY) CALL CTEST(LENX,CX,CT10X(1,KN,KI),CSIZE3,1.0E0) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0E0) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF * 40 CONTINUE 60 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT REAL ZERO PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN * .. Array Arguments .. REAL SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. REAL SD INTEGER I * .. External Functions .. REAL SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,3I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. REAL SCOMP1, SFAC, STRUE1 * .. Array Arguments .. REAL SSIZE(*) * .. Local Arrays .. REAL SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END REAL FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. REAL SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE CTEST(LEN,CCOMP,CTRUE,CSIZE,SFAC) * **************************** CTEST ***************************** * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. REAL SFAC INTEGER LEN * .. Array Arguments .. COMPLEX CCOMP(LEN), CSIZE(LEN), CTRUE(LEN) * .. Local Scalars .. INTEGER I * .. Local Arrays .. REAL SCOMP(20), SSIZE(20), STRUE(20) * .. External Subroutines .. EXTERNAL STEST * .. Intrinsic Functions .. INTRINSIC AIMAG, REAL * .. Executable Statements .. DO 20 I = 1, LEN SCOMP(2*I-1) = REAL(CCOMP(I)) SCOMP(2*I) = AIMAG(CCOMP(I)) STRUE(2*I-1) = REAL(CTRUE(I)) STRUE(2*I) = AIMAG(CTRUE(I)) SSIZE(2*I-1) = REAL(CSIZE(I)) SSIZE(2*I) = AIMAG(CSIZE(I)) 20 CONTINUE * CALL STEST(2*LEN,SCOMP,STRUE,SSIZE,SFAC) RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,3I5,2I36,I12) END blis-1.1/blastest/src/fortran/cblat2.f000066400000000000000000003436611474157777200177470ustar00rootroot00000000000000*> \brief \b CBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM CBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 17 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 35 lines: *> 'cblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> CGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> CGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> CHEMV T PUT F FOR NO TEST. SAME COLUMNS. *> CHBMV T PUT F FOR NO TEST. SAME COLUMNS. *> CHPMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTRMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTBMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTPMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTRSV T PUT F FOR NO TEST. SAME COLUMNS. *> CTBSV T PUT F FOR NO TEST. SAME COLUMNS. *> CTPSV T PUT F FOR NO TEST. SAME COLUMNS. *> CGERC T PUT F FOR NO TEST. SAME COLUMNS. *> CGERU T PUT F FOR NO TEST. SAME COLUMNS. *> CHER T PUT F FOR NO TEST. SAME COLUMNS. *> CHPR T PUT F FOR NO TEST. SAME COLUMNS. *> CHER2 T PUT F FOR NO TEST. SAME COLUMNS. *> CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex_blas_testing * * ===================================================================== PROGRAM CBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 17 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LCE EXTERNAL SDIFF, LCE * .. External Subroutines .. EXTERNAL CCHK1, CCHK2, CCHK3, CCHK4, CCHK5, CCHK6, $ CCHKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'CGEMV ', 'CGBMV ', 'CHEMV ', 'CHBMV ', $ 'CHPMV ', 'CTRMV ', 'CTBMV ', 'CTPMV ', $ 'CTRSV ', 'CTBSV ', 'CTPSV ', 'CGERC ', $ 'CGERU ', 'CHER ', 'CHPR ', 'CHER2 ', $ 'CHPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from CMVCH YT holds * the result computed by CMVCH. TRANS = 'N' CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL CCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 170, 180, $ 180, 190, 190 )ISNUM * Test CGEMV, 01, and CGBMV, 02. 140 CALL CCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test CHEMV, 03, CHBMV, 04, and CHPMV, 05. 150 CALL CCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test CTRMV, 06, CTBMV, 07, CTPMV, 08, * CTRSV, 09, CTBSV, 10, and CTPSV, 11. 160 CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test CGERC, 12, CGERU, 13. 170 CALL CCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test CHER, 14, and CHPR, 15. 180 CALL CCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test CHER2, 16, and CHPR2, 17. 190 CALL CCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE COMPLEX LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9988 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN CMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' CMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of CBLAT2. * END SUBROUTINE CCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests CGEMV and CGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CGBMV, CGEMV, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL CGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL CGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LCE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LCE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LCE( YS, YY, LY ) ELSE ISAME( 10 ) = LCERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LCE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LCE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LCE( YS, YY, LY ) ELSE ISAME( 12 ) = LCERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL CMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK1. * END SUBROUTINE CCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests CHEMV, CHBMV and CHPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHBMV, CHEMV, CHPMV, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL CHEMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL CHBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL CHPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LCE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LCE( YS, YY, LY ) ELSE ISAME( 9 ) = LCERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LCE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LCE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LCE( YS, YY, LY ) ELSE ISAME( 10 ) = LCERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( AS, AA, LAA ) ISAME( 5 ) = LCE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LCE( YS, YY, LY ) ELSE ISAME( 8 ) = LCERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL CMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), AP, X,', I2, ',(', F4.1, ',', F4.1, '), Y,', I2, $ ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', F4.1, '), ', $ 'Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK2. * END SUBROUTINE CCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests CTRMV, CTBMV, CTPMV, CTRSV, CTBSV and CTPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XT( NMAX ), XX( NMAX*INCMAX ), Z( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX TRANSL REAL ERR, ERRMAX INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CMAKE, CMVCH, CTBMV, CTBSV, CTPMV, CTPSV, $ CTRMV, CTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero vector for CMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL CTPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL CTPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LCE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LCE( XS, XX, LX ) ELSE ISAME( 7 ) = LCERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LCE( XS, XX, LX ) ELSE ISAME( 8 ) = LCERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LCE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LCE( XS, XX, LX ) ELSE ISAME( 6 ) = LCERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL CMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK3. * END SUBROUTINE CCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests CGERC and CGERU. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX ALPHA, ALS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL CONJ, NULL, RESET, SAME * .. Local Arrays .. COMPLEX W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CGERC, CGERU, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, CONJG, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. CONJ = SNAME( 5: 5 ).EQ.'C' * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( CONJ )THEN IF( REWI ) $ REWIND NTRA CALL CGERC( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) ELSE IF( REWI ) $ REWIND NTRA CALL CGERU( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LCE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LCE( AS, AA, LAA ) ELSE ISAME( 8 ) = LCERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF IF( CONJ ) $ W( 1 ) = CONJG( W( 1 ) ) CALL CMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), '(', F4.1, ',', F4.1, $ '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK4. * END SUBROUTINE CCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests CHER and CHPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX ALPHA, TRANSL REAL ERR, ERRMAX, RALPHA, RALS INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHER, CHPR, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, CONJG, MAX, REAL * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF RALPHA = REAL( ALF( IA ) ) ALPHA = CMPLX( RALPHA, RZERO ) NULL = N.LE.0.OR.RALPHA.EQ.RZERO * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N RALS = RALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ RALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL CHER( UPLO, N, RALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ RALPHA, INCX IF( REWI ) $ REWIND NTRA CALL CHPR( UPLO, N, RALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = RALS.EQ.RALPHA ISAME( 4 ) = LCE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LCE( AS, AA, LAA ) ELSE ISAME( 6 ) = LCERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = CONJG( Z( J ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL CMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, RALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, RALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK5. * END SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests CHER2 and CHPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX ALPHA, ALS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHER2, CHPR2, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, CONJG, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL CHER2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL CHPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LCE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LCE( AS, AA, LAA ) ELSE ISAME( 8 ) = LCERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = ALPHA*CONJG( Z( J, 2 ) ) W( 2 ) = CONJG( ALPHA )*CONJG( Z( J, 1 ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL CMVCH( 'N', LJ, 2, ONE, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', AP) ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK6. * END SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. COMPLEX ALPHA, BETA REAL RALPHA * .. Local Arrays .. COMPLEX A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CGBMV, CGEMV, CGERC, CGERU, CHBMV, CHEMV, CHER, $ CHER2, CHKXER, CHPMV, CHPR, CHPR2, CTBMV, $ CTBSV, CTPMV, CTPSV, CTRMV, CTRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, 160, $ 170 )ISNUM 10 INFOT = 1 CALL CGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 20 INFOT = 1 CALL CGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 30 INFOT = 1 CALL CHEMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHEMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHEMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHEMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 40 INFOT = 1 CALL CHBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CHBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CHBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CHBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 50 INFOT = 1 CALL CHPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CHPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 60 INFOT = 1 CALL CTRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CTRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 70 INFOT = 1 CALL CTBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 80 INFOT = 1 CALL CTPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 90 INFOT = 1 CALL CTRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CTRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 100 INFOT = 1 CALL CTBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 110 INFOT = 1 CALL CTPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 120 INFOT = 1 CALL CGERC( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGERC( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGERC( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CGERC( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CGERC( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 130 INFOT = 1 CALL CGERU( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGERU( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGERU( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CGERU( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CGERU( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 140 INFOT = 1 CALL CHER( '/', 0, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHER( 'U', -1, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHER( 'U', 0, RALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER( 'U', 2, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 150 INFOT = 1 CALL CHPR( '/', 0, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHPR( 'U', -1, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHPR( 'U', 0, RALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 160 INFOT = 1 CALL CHER2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHER2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHER2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 170 INFOT = 1 CALL CHPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 180 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of CCHKE. * END SUBROUTINE CMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) COMPLEX ROGUE PARAMETER ( ROGUE = ( -1.0E10, 1.0E10 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) REAL RROGUE PARAMETER ( RROGUE = -1.0E10 ) * .. Scalar Arguments .. COMPLEX TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, JJ, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX CBEG EXTERNAL CBEG * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, MIN, REAL * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'H' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = CBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = CONJG( A( I, J ) ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( SYM ) $ A( J, J ) = CMPLX( REAL( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE IF( SYM )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = CMPLX( REAL( AA( JJ ) ), RROGUE ) END IF 130 CONTINUE ELSE IF( TYPE.EQ.'HB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE IF( SYM )THEN JJ = KK + ( J - 1 )*LDA AA( JJ ) = CMPLX( REAL( AA( JJ ) ), RROGUE ) END IF 170 CONTINUE ELSE IF( TYPE.EQ.'HP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE IF( SYM ) $ AA( IOFF ) = CMPLX( REAL( AA( IOFF ) ), RROGUE ) END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of CMAKE. * END SUBROUTINE CMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0, RONE = 1.0 ) * .. Scalar Arguments .. COMPLEX ALPHA, BETA REAL EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. COMPLEX A( NMAX, * ), X( * ), Y( * ), YT( * ), YY( * ) REAL G( * ) * .. Local Scalars .. COMPLEX C REAL ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL CTRAN, TRAN * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, REAL, SQRT * .. Statement Functions .. REAL ABS1 * .. Statement Function definitions .. ABS1( C ) = ABS( REAL( C ) ) + ABS( AIMAG( C ) ) * .. Executable Statements .. TRAN = TRANS.EQ.'T' CTRAN = TRANS.EQ.'C' IF( TRAN.OR.CTRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 40 I = 1, ML YT( IY ) = ZERO G( IY ) = RZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE IF( CTRAN )THEN DO 20 J = 1, NL YT( IY ) = YT( IY ) + CONJG( A( J, I ) )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 20 CONTINUE ELSE DO 30 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS1( A( I, J ) )*ABS1( X( JX ) ) JX = JX + INCXL 30 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS1( ALPHA )*G( IY ) + ABS1( BETA )*ABS1( Y( IY ) ) IY = IY + INCYL 40 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 50 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 60 50 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 80 * * Report fatal error. * 60 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 70 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT( I ) END IF 70 CONTINUE * 80 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) * * End of CMVCH. * END LOGICAL FUNCTION LCE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LCE = .TRUE. GO TO 30 20 CONTINUE LCE = .FALSE. 30 RETURN * * End of LCE. * END LOGICAL FUNCTION LCERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'HE' or 'HP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LCERES = .TRUE. GO TO 80 70 CONTINUE LCERES = .FALSE. 80 RETURN * * End of LCERES. * END COMPLEX FUNCTION CBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC CMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF CBEG = CMPLX( ( I - 500 )/1001.0, ( J - 500 )/1001.0 ) RETURN * * End of CBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/fortran/cblat3.f000066400000000000000000004007361474157777200177450ustar00rootroot00000000000000*> \brief \b CBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM CBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 9 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 23 lines: *> 'cblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> CGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> CHEMM T PUT F FOR NO TEST. SAME COLUMNS. *> CSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> CTRMM T PUT F FOR NO TEST. SAME COLUMNS. *> CTRSM T PUT F FOR NO TEST. SAME COLUMNS. *> CHERK T PUT F FOR NO TEST. SAME COLUMNS. *> CSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> CHER2K T PUT F FOR NO TEST. SAME COLUMNS. *> CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex_blas_testing * * ===================================================================== PROGRAM CBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LCE EXTERNAL SDIFF, LCE * .. External Subroutines .. EXTERNAL CCHK1, CCHK2, CCHK3, CCHK4, CCHK5, CCHKE, CMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'CGEMM ', 'CHEMM ', 'CSYMM ', 'CTRMM ', $ 'CTRSM ', 'CHERK ', 'CSYRK ', 'CHER2K', $ 'CSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from CMMCH CT holds * the result computed by CMMCH. TRANSA = 'N' TRANSB = 'N' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'C' TRANSB = 'N' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL CCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 150, 160, 160, 170, 170, $ 180, 180 )ISNUM * Test CGEMM, 01. 140 CALL CCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test CHEMM, 02, CSYMM, 03. 150 CALL CCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test CTRMM, 04, CTRSM, 05. 160 CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test CHERK, 06, CSYRK, 07. 170 CALL CCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test CHER2K, 08, CSYR2K, 09. 180 CALL CCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE COMPLEX LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9992 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN CMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' CMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of CBLAT3. * END SUBROUTINE CCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests CGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS REAL ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CGEMM, CMAKE, CMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL CMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL CMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL CMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL CGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LCE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LCE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LCE( CS, CC, LCC ) ELSE ISAME( 12 ) = LCERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL CMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK1. * END SUBROUTINE CCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests CHEMM and CSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS REAL ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL CONJ, LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHEMM, CMAKE, CMMCH, CSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL CMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the hermitian or symmetric matrix A. * CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', NA, NA, A, NMAX, $ AA, LDA, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL CMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA IF( CONJ )THEN CALL CHEMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) ELSE CALL CSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LCE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LCE( CS, CC, LCC ) ELSE ISAME( 11 ) = LCERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL CMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL CMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK2. * END SUBROUTINE CCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests CTRMM and CTRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS REAL ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CMAKE, CMMCH, CTRMM, CTRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero matrix for CMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL CMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL CMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL CTRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL CTRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LCE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LCE( BS, BB, LBB ) ELSE ISAME( 10 ) = LCERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL CMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL CMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL CMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL CMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK3. * END SUBROUTINE CCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests CHERK and CSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RONE, RZERO PARAMETER ( RONE = 1.0, RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BETS REAL ERR, ERRMAX, RALPHA, RALS, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHERK, CMAKE, CMMCH, CSYRK * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, REAL * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL CMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) IF( CONJ )THEN RALPHA = REAL( ALPHA ) ALPHA = CMPLX( RALPHA, RZERO ) END IF * DO 50 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = REAL( BETA ) BETA = CMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.RALPHA.EQ. $ RZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K IF( CONJ )THEN RALS = RALPHA ELSE ALS = ALPHA END IF DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, RALPHA, LDA, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL CHERK( UPLO, TRANS, N, K, RALPHA, AA, $ LDA, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL CSYRK( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K IF( CONJ )THEN ISAME( 5 ) = RALS.EQ.RALPHA ELSE ISAME( 5 ) = ALS.EQ.ALPHA END IF ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( CONJ )THEN ISAME( 8 ) = RBETS.EQ.RBETA ELSE ISAME( 8 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 9 ) = LCE( CS, CC, LCC ) ELSE ISAME( 9 ) = LCERES( SNAME( 2: 3 ), UPLO, N, $ N, CS, CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL CMMCH( TRANST, 'N', LJ, 1, K, $ ALPHA, A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL CMMCH( 'N', TRANST, LJ, 1, K, $ ALPHA, A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, RALPHA, $ LDA, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, $ '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK4. * END SUBROUTINE CCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests CHER2K and CSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RONE, RZERO PARAMETER ( RONE = 1.0, RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BETS REAL ERR, ERRMAX, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHER2K, CMAKE, CMMCH, CSYR2K * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, REAL * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = REAL( BETA ) BETA = CMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.ALPHA.EQ. $ ZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL CHER2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL CSYR2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LCE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB IF( CONJ )THEN ISAME( 10 ) = RBETS.EQ.RBETA ELSE ISAME( 10 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 11 ) = LCE( CS, CC, LCC ) ELSE ISAME( 11 ) = LCERES( 'HE', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = ALPHA*AB( ( J - 1 )*2* $ NMAX + K + I ) IF( CONJ )THEN W( K + I ) = CONJG( ALPHA )* $ AB( ( J - 1 )*2* $ NMAX + I ) ELSE W( K + I ) = ALPHA* $ AB( ( J - 1 )*2* $ NMAX + I ) END IF 50 CONTINUE CALL CMMCH( TRANST, 'N', LJ, 1, 2*K, $ ONE, AB( JJAB ), 2*NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE DO 60 I = 1, K IF( CONJ )THEN W( I ) = ALPHA*CONJG( AB( ( K + $ I - 1 )*NMAX + J ) ) W( K + I ) = CONJG( ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) ) ELSE W( I ) = ALPHA*AB( ( K + I - 1 )* $ NMAX + J ) W( K + I ) = ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) END IF 60 CONTINUE CALL CMMCH( 'N', 'N', LJ, 1, 2*K, ONE, $ AB( JJ ), NMAX, W, 2*NMAX, $ BETA, C( JJ, J ), NMAX, CT, $ G, CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC END IF * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, $ ', C,', I3, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK5. * END SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) * 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM * with INFOT = 9 (eca) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. COMPLEX ALPHA, BETA REAL RALPHA, RBETA * .. Local Arrays .. COMPLEX A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL CGEMM, CHEMM, CHER2K, CHERK, CHKXER, CSYMM, $ CSYR2K, CSYRK, CTRMM, CTRSM * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA, BETA, RALPHA, and RBETA. * ALPHA = CMPLX( ONE, -ONE ) BETA = CMPLX( TWO, -TWO ) RALPHA = ONE RBETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 CALL CGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL CGEMM( '/', 'C', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL CGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMM( 'C', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'N', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'C', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'C', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'C', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'T', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'N', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'C', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'C', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'C', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'T', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'N', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'C', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'C', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'C', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'T', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'C', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'C', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'T', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'N', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'C', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'T', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'C', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'C', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'C', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'C', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'T', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 20 INFOT = 1 CALL CHEMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHEMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 30 INFOT = 1 CALL CSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 40 INFOT = 1 CALL CTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 50 INFOT = 1 CALL CTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 60 INFOT = 1 CALL CHERK( '/', 'N', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHERK( 'U', 'T', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'U', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'U', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'L', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'L', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'U', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'U', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'L', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'L', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'U', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'U', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'L', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'L', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'U', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'U', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'L', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'L', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 70 INFOT = 1 CALL CSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CSYRK( 'U', 'C', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 80 INFOT = 1 CALL CHER2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHER2K( 'U', 'T', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'U', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'L', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'U', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'L', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'U', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'L', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'U', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'L', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'U', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'L', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 90 INFOT = 1 CALL CSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CSYR2K( 'U', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 100 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of CCHKE. * END SUBROUTINE CMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'HE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) COMPLEX ROGUE PARAMETER ( ROGUE = ( -1.0E10, 1.0E10 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) REAL RROGUE PARAMETER ( RROGUE = -1.0E10 ) * .. Scalar Arguments .. COMPLEX TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J, JJ LOGICAL GEN, HER, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX CBEG EXTERNAL CBEG * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, REAL * .. Executable Statements .. GEN = TYPE.EQ.'GE' HER = TYPE.EQ.'HE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = CBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( HER )THEN A( J, I ) = CONJG( A( I, J ) ) ELSE IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( HER ) $ A( J, J ) = CMPLX( REAL( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE IF( HER )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = CMPLX( REAL( AA( JJ ) ), RROGUE ) END IF 90 CONTINUE END IF RETURN * * End of CMAKE. * END SUBROUTINE CMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0, RONE = 1.0 ) * .. Scalar Arguments .. COMPLEX ALPHA, BETA REAL EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ) REAL G( * ) * .. Local Scalars .. COMPLEX CL REAL ERRI INTEGER I, J, K LOGICAL CTRANA, CTRANB, TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, REAL, SQRT * .. Statement Functions .. REAL ABS1 * .. Statement Function definitions .. ABS1( CL ) = ABS( REAL( CL ) ) + ABS( AIMAG( CL ) ) * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' CTRANA = TRANSA.EQ.'C' CTRANB = TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 220 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS1( A( I, K ) )*ABS1( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN IF( CTRANA )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 60 CONTINUE 70 CONTINUE END IF ELSE IF( .NOT.TRANA.AND.TRANB )THEN IF( CTRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( I, K )*CONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 80 CONTINUE 90 CONTINUE ELSE DO 110 K = 1, KK DO 100 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 100 CONTINUE 110 CONTINUE END IF ELSE IF( TRANA.AND.TRANB )THEN IF( CTRANA )THEN IF( CTRANB )THEN DO 130 K = 1, KK DO 120 I = 1, M CT( I ) = CT( I ) + CONJG( A( K, I ) )* $ CONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 120 CONTINUE 130 CONTINUE ELSE DO 150 K = 1, KK DO 140 I = 1, M CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 140 CONTINUE 150 CONTINUE END IF ELSE IF( CTRANB )THEN DO 170 K = 1, KK DO 160 I = 1, M CT( I ) = CT( I ) + A( K, I )*CONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 160 CONTINUE 170 CONTINUE ELSE DO 190 K = 1, KK DO 180 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 180 CONTINUE 190 CONTINUE END IF END IF END IF DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( I, J ) ) 200 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 210 I = 1, M ERRI = ABS1( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 230 210 CONTINUE * 220 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 250 * * Report fatal error. * 230 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 240 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 240 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 250 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of CMMCH. * END LOGICAL FUNCTION LCE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LCE = .TRUE. GO TO 30 20 CONTINUE LCE = .FALSE. 30 RETURN * * End of LCE. * END LOGICAL FUNCTION LCERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'HE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LCERES = .TRUE. GO TO 80 70 CONTINUE LCERES = .FALSE. 80 RETURN * * End of LCERES. * END COMPLEX FUNCTION CBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC CMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF CBEG = CMPLX( ( I - 500 )/1001.0, ( J - 500 )/1001.0 ) RETURN * * End of CBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/fortran/dblat1.f000066400000000000000000001306651474157777200177450ustar00rootroot00000000000000*> \brief \b DBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM DBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the DOUBLE PRECISION Level 1 BLAS. *> *> Based upon the original BLAS test routine together with: *> F06EAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup double_blas_testing * * ===================================================================== PROGRAM DBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK0, CHECK1, CHECK2, CHECK3, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SFAC/9.765625D-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 13 ICASE = IC CALL HEADER * * .. Initialize PASS, INCX, and INCY for a new case. .. * .. the value 9999 for INCX or INCY will appear in the .. * .. detailed output, if any, for cases that do not involve .. * .. these parameters .. * PASS = .TRUE. INCX = 9999 INCY = 9999 IF (ICASE.EQ.3 .OR. ICASE.EQ.11) THEN CALL CHECK0(SFAC) ELSE IF (ICASE.EQ.7 .OR. ICASE.EQ.8 .OR. ICASE.EQ.9 .OR. + ICASE.EQ.10) THEN CALL CHECK1(SFAC) ELSE IF (ICASE.EQ.1 .OR. ICASE.EQ.2 .OR. ICASE.EQ.5 .OR. + ICASE.EQ.6 .OR. ICASE.EQ.12 .OR. ICASE.EQ.13) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.EQ.4) THEN CALL CHECK3(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Real BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(13) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA L(1)/' DDOT '/ DATA L(2)/'DAXPY '/ DATA L(3)/'DROTG '/ DATA L(4)/' DROT '/ DATA L(5)/'DCOPY '/ DATA L(6)/'DSWAP '/ DATA L(7)/'DNRM2 '/ DATA L(8)/'DASUM '/ DATA L(9)/'DSCAL '/ DATA L(10)/'IDAMAX'/ DATA L(11)/'DROTMG'/ DATA L(12)/'DROTM '/ DATA L(13)/'DSDOT '/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK0(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SA, SB, SC, SS, D12 INTEGER I, K * .. Local Arrays .. DOUBLE PRECISION DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), $ DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. EXTERNAL DROTG, DROTMG, STEST1 * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA DA1/0.3D0, 0.4D0, -0.3D0, -0.4D0, -0.3D0, 0.0D0, + 0.0D0, 1.0D0/ DATA DB1/0.4D0, 0.3D0, 0.4D0, 0.3D0, -0.4D0, 0.0D0, + 1.0D0, 0.0D0/ DATA DC1/0.6D0, 0.8D0, -0.6D0, 0.8D0, 0.6D0, 1.0D0, + 0.0D0, 1.0D0/ DATA DS1/0.8D0, 0.6D0, 0.8D0, -0.6D0, 0.8D0, 0.0D0, + 1.0D0, 0.0D0/ DATA DATRUE/0.5D0, 0.5D0, 0.5D0, -0.5D0, -0.5D0, + 0.0D0, 1.0D0, 1.0D0/ DATA DBTRUE/0.0D0, 0.6D0, 0.0D0, -0.6D0, 0.0D0, + 0.0D0, 1.0D0, 0.0D0/ * INPUT FOR MODIFIED GIVENS DATA DAB/ .1D0,.3D0,1.2D0,.2D0, A .7D0, .2D0, .6D0, 4.2D0, B 0.D0,0.D0,0.D0,0.D0, C 4.D0, -1.D0, 2.D0, 4.D0, D 6.D-10, 2.D-2, 1.D5, 10.D0, E 4.D10, 2.D-2, 1.D-5, 10.D0, F 2.D-10, 4.D-2, 1.D5, 10.D0, G 2.D10, 4.D-2, 1.D-5, 10.D0, H 4.D0, -2.D0, 8.D0, 4.D0 / * TRUE RESULTS FOR MODIFIED GIVENS DATA DTRUE/0.D0,0.D0, 1.3D0, .2D0, 0.D0,0.D0,0.D0, .5D0, 0.D0, A 0.D0,0.D0, 4.5D0, 4.2D0, 1.D0, .5D0, 0.D0,0.D0,0.D0, B 0.D0,0.D0,0.D0,0.D0, -2.D0, 0.D0,0.D0,0.D0,0.D0, C 0.D0,0.D0,0.D0, 4.D0, -1.D0, 0.D0,0.D0,0.D0,0.D0, D 0.D0, 15.D-3, 0.D0, 10.D0, -1.D0, 0.D0, -1.D-4, E 0.D0, 1.D0, F 0.D0,0.D0, 6144.D-5, 10.D0, -1.D0, 4096.D0, -1.D6, G 0.D0, 1.D0, H 0.D0,0.D0,15.D0,10.D0,-1.D0, 5.D-5, 0.D0,1.D0,0.D0, I 0.D0,0.D0, 15.D0, 10.D0, -1. D0, 5.D5, -4096.D0, J 1.D0, 4096.D-6, K 0.D0,0.D0, 7.D0, 4.D0, 0.D0,0.D0, -.5D0, -.25D0, 0.D0/ * 4096 = 2 ** 12 DATA D12 /4096.D0/ DTRUE(1,1) = 12.D0 / 130.D0 DTRUE(2,1) = 36.D0 / 130.D0 DTRUE(7,1) = -1.D0 / 6.D0 DTRUE(1,2) = 14.D0 / 75.D0 DTRUE(2,2) = 49.D0 / 75.D0 DTRUE(9,2) = 1.D0 / 7.D0 DTRUE(1,5) = 45.D-11 * (D12 * D12) DTRUE(3,5) = 4.D5 / (3.D0 * D12) DTRUE(6,5) = 1.D0 / D12 DTRUE(8,5) = 1.D4 / (3.D0 * D12) DTRUE(1,6) = 4.D10 / (1.5D0 * D12 * D12) DTRUE(2,6) = 2.D-2 / 1.5D0 DTRUE(8,6) = 5.D-7 * D12 DTRUE(1,7) = 4.D0 / 150.D0 DTRUE(2,7) = (2.D-10 / 1.5D0) * (D12 * D12) DTRUE(7,7) = -DTRUE(6,5) DTRUE(9,7) = 1.D4 / D12 DTRUE(1,8) = DTRUE(1,7) DTRUE(2,8) = 2.D10 / (1.5D0 * D12 * D12) DTRUE(1,9) = 32.D0 / 7.D0 DTRUE(2,9) = -16.D0 / 7.D0 * .. Executable Statements .. * * Compute true values which cannot be prestored * in decimal notation * DBTRUE(1) = 1.0D0/0.6D0 DBTRUE(3) = -1.0D0/0.6D0 DBTRUE(5) = 1.0D0/0.6D0 * DO 20 K = 1, 8 * .. Set N=K for identification in output if any .. N = K IF (ICASE.EQ.3) THEN * .. DROTG .. IF (K.GT.8) GO TO 40 SA = DA1(K) SB = DB1(K) CALL DROTG(SA,SB,SC,SS) CALL STEST1(SA,DATRUE(K),DATRUE(K),SFAC) CALL STEST1(SB,DBTRUE(K),DBTRUE(K),SFAC) CALL STEST1(SC,DC1(K),DC1(K),SFAC) CALL STEST1(SS,DS1(K),DS1(K),SFAC) ELSEIF (ICASE.EQ.11) THEN * .. DROTMG .. DO I=1,4 DTEMP(I)= DAB(I,K) DTEMP(I+4) = 0.0 END DO DTEMP(9) = 0.0 CALL DROTMG(DTEMP(1),DTEMP(2),DTEMP(3),DTEMP(4),DTEMP(5)) CALL STEST(9,DTEMP,DTRUE(1,K),DTRUE(1,K),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' STOP END IF 20 CONTINUE 40 RETURN END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER I, LEN, NP1 * .. Local Arrays .. DOUBLE PRECISION DTRUE1(5), DTRUE3(5), DTRUE5(8,5,2), DV(8,5,2), + SA(10), STEMP(1), STRUE(8), SX(8) INTEGER ITRUE2(5) * .. External Functions .. DOUBLE PRECISION DASUM, DNRM2 INTEGER IDAMAX EXTERNAL DASUM, DNRM2, IDAMAX * .. External Subroutines .. EXTERNAL ITEST1, DSCAL, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SA/0.3D0, -1.0D0, 0.0D0, 1.0D0, 0.3D0, 0.3D0, + 0.3D0, 0.3D0, 0.3D0, 0.3D0/ DATA DV/0.1D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, + 2.0D0, 2.0D0, 0.3D0, 3.0D0, 3.0D0, 3.0D0, 3.0D0, + 3.0D0, 3.0D0, 3.0D0, 0.3D0, -0.4D0, 4.0D0, + 4.0D0, 4.0D0, 4.0D0, 4.0D0, 4.0D0, 0.2D0, + -0.6D0, 0.3D0, 5.0D0, 5.0D0, 5.0D0, 5.0D0, + 5.0D0, 0.1D0, -0.3D0, 0.5D0, -0.1D0, 6.0D0, + 6.0D0, 6.0D0, 6.0D0, 0.1D0, 8.0D0, 8.0D0, 8.0D0, + 8.0D0, 8.0D0, 8.0D0, 8.0D0, 0.3D0, 9.0D0, 9.0D0, + 9.0D0, 9.0D0, 9.0D0, 9.0D0, 9.0D0, 0.3D0, 2.0D0, + -0.4D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, + 0.2D0, 3.0D0, -0.6D0, 5.0D0, 0.3D0, 2.0D0, + 2.0D0, 2.0D0, 0.1D0, 4.0D0, -0.3D0, 6.0D0, + -0.5D0, 7.0D0, -0.1D0, 3.0D0/ DATA DTRUE1/0.0D0, 0.3D0, 0.5D0, 0.7D0, 0.6D0/ DATA DTRUE3/0.0D0, 0.3D0, 0.7D0, 1.1D0, 1.0D0/ DATA DTRUE5/0.10D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, + 2.0D0, 2.0D0, 2.0D0, -0.3D0, 3.0D0, 3.0D0, + 3.0D0, 3.0D0, 3.0D0, 3.0D0, 3.0D0, 0.0D0, 0.0D0, + 4.0D0, 4.0D0, 4.0D0, 4.0D0, 4.0D0, 4.0D0, + 0.20D0, -0.60D0, 0.30D0, 5.0D0, 5.0D0, 5.0D0, + 5.0D0, 5.0D0, 0.03D0, -0.09D0, 0.15D0, -0.03D0, + 6.0D0, 6.0D0, 6.0D0, 6.0D0, 0.10D0, 8.0D0, + 8.0D0, 8.0D0, 8.0D0, 8.0D0, 8.0D0, 8.0D0, + 0.09D0, 9.0D0, 9.0D0, 9.0D0, 9.0D0, 9.0D0, + 9.0D0, 9.0D0, 0.09D0, 2.0D0, -0.12D0, 2.0D0, + 2.0D0, 2.0D0, 2.0D0, 2.0D0, 0.06D0, 3.0D0, + -0.18D0, 5.0D0, 0.09D0, 2.0D0, 2.0D0, 2.0D0, + 0.03D0, 4.0D0, -0.09D0, 6.0D0, -0.15D0, 7.0D0, + -0.03D0, 3.0D0/ DATA ITRUE2/0, 1, 2, 2, 3/ * .. Executable Statements .. DO 80 INCX = 1, 2 DO 60 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN SX(I) = DV(I,NP1,INCX) 20 CONTINUE * IF (ICASE.EQ.7) THEN * .. DNRM2 .. STEMP(1) = DTRUE1(NP1) CALL STEST1(DNRM2(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. DASUM .. STEMP(1) = DTRUE3(NP1) CALL STEST1(DASUM(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. DSCAL .. CALL DSCAL(N,SA((INCX-1)*5+NP1),SX,INCX) DO 40 I = 1, LEN STRUE(I) = DTRUE5(I,NP1,INCX) 40 CONTINUE CALL STEST(LEN,SX,STRUE,STRUE,SFAC) ELSE IF (ICASE.EQ.10) THEN * .. IDAMAX .. CALL ITEST1(IDAMAX(N,SX,INCX),ITRUE2(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF 60 CONTINUE 80 CONTINUE RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SA INTEGER I, J, KI, KN, KNI, KPAR, KSIZE, LENX, LENY, $ MX, MY * .. Local Arrays .. DOUBLE PRECISION DT10X(7,4,4), DT10Y(7,4,4), DT7(4,4), $ DT8(7,4,4), DX1(7), $ DY1(7), SSIZE1(4), SSIZE2(14,2), SSIZE(7), $ STX(7), STY(7), SX(7), SY(7), $ DPAR(5,4), DT19X(7,4,16),DT19XA(7,4,4), $ DT19XB(7,4,4), DT19XC(7,4,4),DT19XD(7,4,4), $ DT19Y(7,4,16), DT19YA(7,4,4),DT19YB(7,4,4), $ DT19YC(7,4,4), DT19YD(7,4,4), DTEMP(5) REAL SX1(7), SY1(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. DOUBLE PRECISION DDOT, DSDOT EXTERNAL DDOT, DSDOT * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DROTM, DSWAP, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. EQUIVALENCE (DT19X(1,1,1),DT19XA(1,1,1)),(DT19X(1,1,5), A DT19XB(1,1,1)),(DT19X(1,1,9),DT19XC(1,1,1)), B (DT19X(1,1,13),DT19XD(1,1,1)) EQUIVALENCE (DT19Y(1,1,1),DT19YA(1,1,1)),(DT19Y(1,1,5), A DT19YB(1,1,1)),(DT19Y(1,1,9),DT19YC(1,1,1)), B (DT19Y(1,1,13),DT19YD(1,1,1)) DATA SA/0.3D0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6D0, 0.1D0, -0.5D0, 0.8D0, 0.9D0, -0.3D0, + -0.4D0/ DATA DY1/0.5D0, -0.9D0, 0.3D0, 0.7D0, -0.6D0, 0.2D0, + 0.8D0/ ***** FGVZ: We have to add separate REAL arrays for DSDOT() because ***** REAL() on an array argument does not translate via f2c. DATA SX1/0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.9E0, -0.3E0, + -0.4E0/ DATA SY1/0.5E0, -0.9E0, 0.3E0, 0.7E0, -0.6E0, 0.2E0, + 0.8E0/ DATA DT7/0.0D0, 0.30D0, 0.21D0, 0.62D0, 0.0D0, + 0.30D0, -0.07D0, 0.85D0, 0.0D0, 0.30D0, -0.79D0, + -0.74D0, 0.0D0, 0.30D0, 0.33D0, 1.27D0/ DATA DT8/0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.68D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.68D0, -0.87D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.68D0, -0.87D0, 0.15D0, + 0.94D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.68D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.35D0, -0.9D0, 0.48D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.38D0, -0.9D0, 0.57D0, 0.7D0, -0.75D0, + 0.2D0, 0.98D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.68D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.35D0, -0.72D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.38D0, + -0.63D0, 0.15D0, 0.88D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.68D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.68D0, -0.9D0, 0.33D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.68D0, -0.9D0, 0.33D0, 0.7D0, + -0.75D0, 0.2D0, 1.04D0/ DATA DT10X/0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, -0.9D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.5D0, -0.9D0, 0.3D0, 0.7D0, + 0.0D0, 0.0D0, 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.3D0, 0.1D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.8D0, 0.1D0, -0.6D0, + 0.8D0, 0.3D0, -0.3D0, 0.5D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, -0.9D0, + 0.1D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.7D0, + 0.1D0, 0.3D0, 0.8D0, -0.9D0, -0.3D0, 0.5D0, + 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.3D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.3D0, -0.6D0, 0.8D0, 0.0D0, 0.0D0, + 0.0D0/ DATA DT10Y/0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.1D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.1D0, -0.5D0, 0.8D0, 0.0D0, + 0.0D0, 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, -0.5D0, -0.9D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, -0.4D0, -0.9D0, 0.9D0, + 0.7D0, -0.5D0, 0.2D0, 0.6D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, -0.5D0, + 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + -0.4D0, 0.9D0, -0.5D0, 0.6D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, -0.9D0, 0.1D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.6D0, -0.9D0, 0.1D0, 0.7D0, + -0.5D0, 0.2D0, 0.8D0/ DATA SSIZE1/0.0D0, 0.3D0, 1.6D0, 3.2D0/ DATA SSIZE2/0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0/ * * FOR DROTM * DATA DPAR/-2.D0, 0.D0,0.D0,0.D0,0.D0, A -1.D0, 2.D0, -3.D0, -4.D0, 5.D0, B 0.D0, 0.D0, 2.D0, -3.D0, 0.D0, C 1.D0, 5.D0, 2.D0, 0.D0, -4.D0/ * TRUE X RESULTS F0R ROTATIONS DROTM DATA DT19XA/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I -.8D0, 3.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J -.9D0, 2.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K 3.5D0, -.4D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, 0.D0,0.D0,0.D0, M -.8D0, 3.8D0, -2.2D0, -1.2D0, 0.D0,0.D0,0.D0, N -.9D0, 2.8D0, -1.4D0, -1.3D0, 0.D0,0.D0,0.D0, O 3.5D0, -.4D0, -2.2D0, 4.7D0, 0.D0,0.D0,0.D0/ * DATA DT19XB/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, -.5D0, 0.D0,0.D0,0.D0,0.D0, I 0.D0, .1D0, -3.0D0, 0.D0,0.D0,0.D0,0.D0, J -.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, K 3.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, .9D0, -.3D0, -.4D0, M -2.0D0, .1D0, 1.4D0, .8D0, .6D0, -.3D0, -2.8D0, N -1.8D0, .1D0, 1.3D0, .8D0, 0.D0, -.3D0, -1.9D0, O 3.8D0, .1D0, -3.1D0, .8D0, 4.8D0, -.3D0, -1.5D0 / * DATA DT19XC/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, -.5D0, 0.D0,0.D0,0.D0,0.D0, I 4.8D0, .1D0, -3.0D0, 0.D0,0.D0,0.D0,0.D0, J 3.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, K 2.1D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, .9D0, -.3D0, -.4D0, M -1.6D0, .1D0, -2.2D0, .8D0, 5.4D0, -.3D0, -2.8D0, N -1.5D0, .1D0, -1.4D0, .8D0, 3.6D0, -.3D0, -1.9D0, O 3.7D0, .1D0, -2.2D0, .8D0, 3.6D0, -.3D0, -1.5D0 / * DATA DT19XD/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I -.8D0, -1.0D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J -.9D0, -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K 3.5D0, .8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, 0.D0,0.D0,0.D0, M -.8D0, -1.0D0, 1.4D0, -1.6D0, 0.D0,0.D0,0.D0, N -.9D0, -.8D0, 1.3D0, -1.6D0, 0.D0,0.D0,0.D0, O 3.5D0, .8D0, -3.1D0, 4.8D0, 0.D0,0.D0,0.D0/ * TRUE Y RESULTS FOR ROTATIONS DROTM DATA DT19YA/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I .7D0, -4.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J 1.7D0, -.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K -2.6D0, 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, 0.D0,0.D0,0.D0, M .7D0, -4.8D0, 3.0D0, 1.1D0, 0.D0,0.D0,0.D0, N 1.7D0, -.7D0, -.7D0, 2.3D0, 0.D0,0.D0,0.D0, O -2.6D0, 3.5D0, -.7D0, -3.6D0, 0.D0,0.D0,0.D0/ * DATA DT19YB/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, .3D0, 0.D0,0.D0,0.D0,0.D0, I 4.0D0, -.9D0, -.3D0, 0.D0,0.D0,0.D0,0.D0, J -.5D0, -.9D0, 1.5D0, 0.D0,0.D0,0.D0,0.D0, K -1.5D0, -.9D0, -1.8D0, 0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, -.6D0, .2D0, .8D0, M 3.7D0, -.9D0, -1.2D0, .7D0, -1.5D0, .2D0, 2.2D0, N -.3D0, -.9D0, 2.1D0, .7D0, -1.6D0, .2D0, 2.0D0, O -1.6D0, -.9D0, -2.1D0, .7D0, 2.9D0, .2D0, -3.8D0 / * DATA DT19YC/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I 4.0D0, -6.3D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J -.5D0, .3D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K -1.5D0, 3.0D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, 0.D0,0.D0,0.D0, M 3.7D0, -7.2D0, 3.0D0, 1.7D0, 0.D0,0.D0,0.D0, N -.3D0, .9D0, -.7D0, 1.9D0, 0.D0,0.D0,0.D0, O -1.6D0, 2.7D0, -.7D0, -3.4D0, 0.D0,0.D0,0.D0/ * DATA DT19YD/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, .3D0, 0.D0,0.D0,0.D0,0.D0, I .7D0, -.9D0, 1.2D0, 0.D0,0.D0,0.D0,0.D0, J 1.7D0, -.9D0, .5D0, 0.D0,0.D0,0.D0,0.D0, K -2.6D0, -.9D0, -1.3D0, 0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, -.6D0, .2D0, .8D0, M .7D0, -.9D0, 1.2D0, .7D0, -1.5D0, .2D0, 1.6D0, N 1.7D0, -.9D0, .5D0, .7D0, -1.6D0, .2D0, 2.4D0, O -2.6D0, -.9D0, -1.3D0, .7D0, 2.9D0, .2D0, -4.0D0 / * * .. Executable Statements .. * DO 120 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 100 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. Initialize all argument arrays .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) ***** FGVZ: We have to add a loop to initialize separate REAL arrays ***** for DSDOT() because REAL() on an array argument does not ***** translate via f2c. SX1(I) = DX1(I) SY1(I) = DY1(I) 20 CONTINUE * IF (ICASE.EQ.1) THEN * .. DDOT .. CALL STEST1(DDOT(N,SX,INCX,SY,INCY),DT7(KN,KI),SSIZE1(KN) + ,SFAC) ELSE IF (ICASE.EQ.2) THEN * .. DAXPY .. CALL DAXPY(N,SA,SX,INCX,SY,INCY) DO 40 J = 1, LENY STY(J) = DT8(J,KN,KI) 40 CONTINUE CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.5) THEN * .. DCOPY .. DO 60 I = 1, 7 STY(I) = DT10Y(I,KN,KI) 60 CONTINUE CALL DCOPY(N,SX,INCX,SY,INCY) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0D0) ELSE IF (ICASE.EQ.6) THEN * .. DSWAP .. CALL DSWAP(N,SX,INCX,SY,INCY) DO 80 I = 1, 7 STX(I) = DT10X(I,KN,KI) STY(I) = DT10Y(I,KN,KI) 80 CONTINUE CALL STEST(LENX,SX,STX,SSIZE2(1,1),1.0D0) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0D0) ELSE IF (ICASE.EQ.12) THEN * .. DROTM .. KNI=KN+4*(KI-1) DO KPAR=1,4 DO I=1,7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I)= DT19X(I,KPAR,KNI) STY(I)= DT19Y(I,KPAR,KNI) END DO * DO I=1,5 DTEMP(I) = DPAR(I,KPAR) END DO * DO I=1,LENX SSIZE(I)=STX(I) END DO * SEE REMARK ABOVE ABOUT DT11X(1,2,7) * AND DT11X(5,3,8). IF ((KPAR .EQ. 2) .AND. (KNI .EQ. 7)) $ SSIZE(1) = 2.4D0 IF ((KPAR .EQ. 3) .AND. (KNI .EQ. 8)) $ SSIZE(5) = 1.8D0 * CALL DROTM(N,SX,INCX,SY,INCY,DTEMP) CALL STEST(LENX,SX,STX,SSIZE,SFAC) CALL STEST(LENY,SY,STY,STY,SFAC) END DO ELSE IF (ICASE.EQ.13) THEN * .. DSDOT .. ***** CALL TESTDSDOT(REAL(DSDOT(N,REAL(SX),INCX,REAL(SY),INCY)), CALL TESTDSDOT(REAL(DSDOT(N,SX1,INCX,SY1,INCY)), $ REAL(DT7(KN,KI)),REAL(SSIZE1(KN)), .3125E-1) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF 100 CONTINUE 120 CONTINUE RETURN END SUBROUTINE CHECK3(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SC, SS INTEGER I, K, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. DOUBLE PRECISION COPYX(5), COPYY(5), DT9X(7,4,4), DT9Y(7,4,4), + DX1(7), DY1(7), MWPC(11), MWPS(11), MWPSTX(5), + MWPSTY(5), MWPTX(11,5), MWPTY(11,5), MWPX(5), + MWPY(5), SSIZE2(14,2), STX(7), STY(7), SX(7), + SY(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), MWPINX(11), + MWPINY(11), MWPN(11), NS(4) * .. External Subroutines .. EXTERNAL DROT, STEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6D0, 0.1D0, -0.5D0, 0.8D0, 0.9D0, -0.3D0, + -0.4D0/ DATA DY1/0.5D0, -0.9D0, 0.3D0, 0.7D0, -0.6D0, 0.2D0, + 0.8D0/ DATA SC, SS/0.8D0, 0.6D0/ DATA DT9X/0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.78D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.78D0, -0.46D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.78D0, -0.46D0, -0.22D0, + 1.06D0, 0.0D0, 0.0D0, 0.0D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.78D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.66D0, 0.1D0, -0.1D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.96D0, 0.1D0, -0.76D0, 0.8D0, 0.90D0, + -0.3D0, -0.02D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.78D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, -0.06D0, 0.1D0, + -0.1D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.90D0, + 0.1D0, -0.22D0, 0.8D0, 0.18D0, -0.3D0, -0.02D0, + 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.78D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.78D0, 0.26D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.78D0, 0.26D0, -0.76D0, 1.12D0, + 0.0D0, 0.0D0, 0.0D0/ DATA DT9Y/0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.04D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.04D0, -0.78D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.04D0, -0.78D0, 0.54D0, + 0.08D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.04D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.7D0, + -0.9D0, -0.12D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.64D0, -0.9D0, -0.30D0, 0.7D0, -0.18D0, 0.2D0, + 0.28D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.04D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.7D0, -1.08D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.64D0, -1.26D0, + 0.54D0, 0.20D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.04D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.04D0, -0.9D0, 0.18D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.04D0, -0.9D0, 0.18D0, 0.7D0, + -0.18D0, 0.2D0, 0.16D0/ DATA SSIZE2/0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0/ * .. Executable Statements .. * DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * IF (ICASE.EQ.4) THEN * .. DROT .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I) = DT9X(I,KN,KI) STY(I) = DT9Y(I,KN,KI) 20 CONTINUE CALL DROT(N,SX,INCX,SY,INCY,SC,SS) CALL STEST(LENX,SX,STX,SSIZE2(1,KSIZE),SFAC) CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' STOP END IF 40 CONTINUE 60 CONTINUE * MWPC(1) = 1 DO 80 I = 2, 11 MWPC(I) = 0 80 CONTINUE MWPS(1) = 0 DO 100 I = 2, 6 MWPS(I) = 1 100 CONTINUE DO 120 I = 7, 11 MWPS(I) = -1 120 CONTINUE MWPINX(1) = 1 MWPINX(2) = 1 MWPINX(3) = 1 MWPINX(4) = -1 MWPINX(5) = 1 MWPINX(6) = -1 MWPINX(7) = 1 MWPINX(8) = 1 MWPINX(9) = -1 MWPINX(10) = 1 MWPINX(11) = -1 MWPINY(1) = 1 MWPINY(2) = 1 MWPINY(3) = -1 MWPINY(4) = -1 MWPINY(5) = 2 MWPINY(6) = 1 MWPINY(7) = 1 MWPINY(8) = -1 MWPINY(9) = -1 MWPINY(10) = 2 MWPINY(11) = 1 DO 140 I = 1, 11 MWPN(I) = 5 140 CONTINUE MWPN(5) = 3 MWPN(10) = 3 DO 160 I = 1, 5 MWPX(I) = I MWPY(I) = I MWPTX(1,I) = I MWPTY(1,I) = I MWPTX(2,I) = I MWPTY(2,I) = -I MWPTX(3,I) = 6 - I MWPTY(3,I) = I - 6 MWPTX(4,I) = I MWPTY(4,I) = -I MWPTX(6,I) = 6 - I MWPTY(6,I) = I - 6 MWPTX(7,I) = -I MWPTY(7,I) = I MWPTX(8,I) = I - 6 MWPTY(8,I) = 6 - I MWPTX(9,I) = -I MWPTY(9,I) = I MWPTX(11,I) = I - 6 MWPTY(11,I) = 6 - I 160 CONTINUE MWPTX(5,1) = 1 MWPTX(5,2) = 3 MWPTX(5,3) = 5 MWPTX(5,4) = 4 MWPTX(5,5) = 5 MWPTY(5,1) = -1 MWPTY(5,2) = 2 MWPTY(5,3) = -2 MWPTY(5,4) = 4 MWPTY(5,5) = -3 MWPTX(10,1) = -1 MWPTX(10,2) = -3 MWPTX(10,3) = -5 MWPTX(10,4) = 4 MWPTX(10,5) = 5 MWPTY(10,1) = 1 MWPTY(10,2) = 2 MWPTY(10,3) = 2 MWPTY(10,4) = 4 MWPTY(10,5) = 3 DO 200 I = 1, 11 INCX = MWPINX(I) INCY = MWPINY(I) DO 180 K = 1, 5 COPYX(K) = MWPX(K) COPYY(K) = MWPY(K) MWPSTX(K) = MWPTX(I,K) MWPSTY(K) = MWPTY(I,K) 180 CONTINUE CALL DROT(MWPN(I),COPYX,INCX,COPYY,INCY,MWPC(I),MWPS(I)) CALL STEST(5,COPYX,MWPSTX,MWPSTX,SFAC) CALL STEST(5,COPYY,MWPSTY,MWPSTY,SFAC) 200 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT DOUBLE PRECISION ZERO PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN * .. Array Arguments .. DOUBLE PRECISION SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SD INTEGER I * .. External Functions .. DOUBLE PRECISION SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,2I5,I3,2D36.8,2D12.4) END SUBROUTINE TESTDSDOT(SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT REAL ZERO PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC, SCOMP, SSIZE, STRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SD * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * SD = SCOMP - STRUE IF (ABS(SFAC*SD) .LE. ABS(SSIZE) * EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, SCOMP, + STRUE, SD, SSIZE 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,1I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. DOUBLE PRECISION SCOMP1, SFAC, STRUE1 * .. Array Arguments .. DOUBLE PRECISION SSIZE(*) * .. Local Arrays .. DOUBLE PRECISION SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END DOUBLE PRECISION FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. DOUBLE PRECISION SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,2I5,2I36,I12) END blis-1.1/blastest/src/fortran/dblat2.f000066400000000000000000003333171474157777200177450ustar00rootroot00000000000000*> \brief \b DBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM DBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the DOUBLE PRECISION Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 16 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 34 lines: *> 'dblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 0.9 VALUES OF BETAC *> DGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> DGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> DSYMV T PUT F FOR NO TEST. SAME COLUMNS. *> DSBMV T PUT F FOR NO TEST. SAME COLUMNS. *> DSPMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTRMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTBMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTPMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTRSV T PUT F FOR NO TEST. SAME COLUMNS. *> DTBSV T PUT F FOR NO TEST. SAME COLUMNS. *> DTPSV T PUT F FOR NO TEST. SAME COLUMNS. *> DGER T PUT F FOR NO TEST. SAME COLUMNS. *> DSYR T PUT F FOR NO TEST. SAME COLUMNS. *> DSPR T PUT F FOR NO TEST. SAME COLUMNS. *> DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. *> DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup double_blas_testing * * ===================================================================== PROGRAM DBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ G( NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LDE EXTERNAL DDIFF, LDE * .. External Subroutines .. EXTERNAL DCHK1, DCHK2, DCHK3, DCHK4, DCHK5, DCHK6, $ DCHKE, DMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'DGEMV ', 'DGBMV ', 'DSYMV ', 'DSBMV ', $ 'DSPMV ', 'DTRMV ', 'DTBMV ', 'DTPMV ', $ 'DTRSV ', 'DTBSV ', 'DTPSV ', 'DGER ', $ 'DSYR ', 'DSPR ', 'DSYR2 ', 'DSPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from DMVCH YT holds * the result computed by DMVCH. TRANS = 'N' CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL DCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 180, 180, $ 190, 190 )ISNUM * Test DGEMV, 01, and DGBMV, 02. 140 CALL DCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test DSYMV, 03, DSBMV, 04, and DSPMV, 05. 150 CALL DCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test DTRMV, 06, DTBMV, 07, DTPMV, 08, * DTRSV, 09, DTBSV, 10, and DTPSV, 11. 160 CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test DGER, 12. 170 CALL DCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test DSYR, 13, and DSPR, 14. 180 CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test DSYR2, 15, and DSPR2, 16. 190 CALL DCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE DOUBLE PRECISION LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9988 FORMAT( ' FOR BETA ', 7F6.1 ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN DMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' DMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of DBLAT2. * END SUBROUTINE DCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests DGEMV and DGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DGBMV, DGEMV, DMAKE, DMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL DGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL DGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LDE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LDE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LDE( YS, YY, LY ) ELSE ISAME( 10 ) = LDERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LDE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LDE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LDE( YS, YY, LY ) ELSE ISAME( 12 ) = LDERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL DMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK1. * END SUBROUTINE DCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests DSYMV, DSBMV and DSPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DSBMV, DSPMV, DSYMV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL DSYMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL DSBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL DSPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LDE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LDE( YS, YY, LY ) ELSE ISAME( 9 ) = LDERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LDE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LDE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LDE( YS, YY, LY ) ELSE ISAME( 10 ) = LDERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( AS, AA, LAA ) ISAME( 5 ) = LDE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LDE( YS, YY, LY ) ELSE ISAME( 8 ) = LDERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL DMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', AP', $ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', A,', $ I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK2. * END SUBROUTINE DCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests DTRMV, DTBMV, DTPMV, DTRSV, DTBSV and DTPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XT( NMAX ), $ XX( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. DOUBLE PRECISION ERR, ERRMAX, TRANSL INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DTBMV, DTBSV, DTPMV, DTPSV, $ DTRMV, DTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero vector for DMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL DTPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL DTPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LDE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LDE( XS, XX, LX ) ELSE ISAME( 7 ) = LDERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LDE( XS, XX, LX ) ELSE ISAME( 8 ) = LDERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LDE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LDE( XS, XX, LX ) ELSE ISAME( 6 ) = LDERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL DMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK3. * END SUBROUTINE DCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests DGER. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL NULL, RESET, SAME * .. Local Arrays .. DOUBLE PRECISION W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DGER, DMAKE, DMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL DGER( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LDE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LDE( AS, AA, LAA ) ELSE ISAME( 8 ) = LDERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF CALL DMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), F4.1, ', X,', I2, $ ', Y,', I2, ', A,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK4. * END SUBROUTINE DCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests DSYR and DSPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. DOUBLE PRECISION W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DSPR, DSYR * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL DSYR( UPLO, N, ALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX IF( REWI ) $ REWIND NTRA CALL DSPR( UPLO, N, ALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LDE( AS, AA, LAA ) ELSE ISAME( 6 ) = LDERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = Z( J ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL DMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK5. * END SUBROUTINE DCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests DSYR2 and DSPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. DOUBLE PRECISION W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DSPR2, DSYR2 * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL DSYR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL DSPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LDE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LDE( AS, AA, LAA ) ELSE ISAME( 8 ) = LDERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = Z( J, 2 ) W( 2 ) = Z( J, 1 ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL DMVCH( 'N', LJ, 2, ALPHA, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK6. * END SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. DOUBLE PRECISION ALPHA, BETA * .. Local Arrays .. DOUBLE PRECISION A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CHKXER, DGBMV, DGEMV, DGER, DSBMV, DSPMV, DSPR, $ DSPR2, DSYMV, DSYR, DSYR2, DTBMV, DTBSV, DTPMV, $ DTPSV, DTRMV, DTRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, $ 160 )ISNUM 10 INFOT = 1 CALL DGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 20 INFOT = 1 CALL DGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 30 INFOT = 1 CALL DSYMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSYMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 40 INFOT = 1 CALL DSBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DSBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DSBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DSBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 50 INFOT = 1 CALL DSPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DSPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 60 INFOT = 1 CALL DTRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DTRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 70 INFOT = 1 CALL DTBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 80 INFOT = 1 CALL DTPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 90 INFOT = 1 CALL DTRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DTRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 100 INFOT = 1 CALL DTBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 110 INFOT = 1 CALL DTPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 120 INFOT = 1 CALL DGER( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGER( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGER( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DGER( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DGER( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 130 INFOT = 1 CALL DSYR( '/', 0, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYR( 'U', -1, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSYR( 'U', 0, ALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR( 'U', 2, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 140 INFOT = 1 CALL DSPR( '/', 0, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSPR( 'U', -1, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSPR( 'U', 0, ALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 150 INFOT = 1 CALL DSYR2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYR2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSYR2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 160 INFOT = 1 CALL DSPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 170 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of DCHKE. * END SUBROUTINE DMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) DOUBLE PRECISION ROGUE PARAMETER ( ROGUE = -1.0D10 ) * .. Scalar Arguments .. DOUBLE PRECISION TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. DOUBLE PRECISION DBEG EXTERNAL DBEG * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'S' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = DBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE 130 CONTINUE ELSE IF( TYPE.EQ.'SB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE 170 CONTINUE ELSE IF( TYPE.EQ.'SP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of DMAKE. * END SUBROUTINE DMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA, EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. DOUBLE PRECISION A( NMAX, * ), G( * ), X( * ), Y( * ), YT( * ), $ YY( * ) * .. Local Scalars .. DOUBLE PRECISION ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL TRAN * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 30 I = 1, ML YT( IY ) = ZERO G( IY ) = ZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS( A( J, I )*X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE DO 20 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS( A( I, J )*X( JX ) ) JX = JX + INCXL 20 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS( ALPHA )*G( IY ) + ABS( BETA*Y( IY ) ) IY = IY + INCYL 30 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 40 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 50 40 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 70 * * Report fatal error. * 50 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 60 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT( I ) END IF 60 CONTINUE * 70 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) * * End of DMVCH. * END LOGICAL FUNCTION LDE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. DOUBLE PRECISION RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LDE = .TRUE. GO TO 30 20 CONTINUE LDE = .FALSE. 30 RETURN * * End of LDE. * END LOGICAL FUNCTION LDERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'SY' or 'SP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LDERES = .TRUE. GO TO 80 70 CONTINUE LDERES = .FALSE. 80 RETURN * * End of LDERES. * END DOUBLE PRECISION FUNCTION DBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Intrinsic Functions .. INTRINSIC DBLE * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF DBEG = DBLE( I - 500 )/1001.0D0 RETURN * * End of DBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/fortran/dblat3.f000066400000000000000000003135061474157777200177440ustar00rootroot00000000000000*> \brief \b DBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM DBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the DOUBLE PRECISION Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 6 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 20 lines: *> 'dblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 1.3 VALUES OF BETA *> DGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> DSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> DTRMM T PUT F FOR NO TEST. SAME COLUMNS. *> DTRSM T PUT F FOR NO TEST. SAME COLUMNS. *> DSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup double_blas_testing * * ===================================================================== PROGRAM DBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. DOUBLE PRECISION AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LDE EXTERNAL DDIFF, LDE * .. External Subroutines .. EXTERNAL DCHK1, DCHK2, DCHK3, DCHK4, DCHK5, DCHKE, DMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'DGEMM ', 'DSYMM ', 'DTRMM ', 'DTRSM ', $ 'DSYRK ', 'DSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from DMMCH CT holds * the result computed by DMMCH. TRANSA = 'N' TRANSB = 'N' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'T' TRANSB = 'N' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL DCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 160, 160, 170, 180 )ISNUM * Test DGEMM, 01. 140 CALL DCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test DSYMM, 02. 150 CALL DCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test DTRMM, 03, DTRSM, 04. 160 CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test DSYRK, 05. 170 CALL DCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test DSYR2K, 06. 180 CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE DOUBLE PRECISION LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9992 FORMAT( ' FOR BETA ', 7F6.1 ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN DMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' DMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of DBLAT3. * END SUBROUTINE DCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests DGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DGEMM, DMAKE, DMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL DMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL DMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL DGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LDE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LDE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LDE( CS, CC, LCC ) ELSE ISAME( 12 ) = LDERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL DMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', $ 'C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK1. * END SUBROUTINE DCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests DSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL DMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the symmetric matrix A. * CALL DMAKE( 'SY', UPLO, ' ', NA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL DSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LDE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LDE( CS, CC, LCC ) ELSE ISAME( 11 ) = LDERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL DMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL DMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK2. * END SUBROUTINE DCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests DTRMM and DTRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DTRMM, DTRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero matrix for DMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL DMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL DMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL DTRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL DTRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LDE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LDE( BS, BB, LBB ) ELSE ISAME( 10 ) = LDERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL DMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL DMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL DMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL DMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK3. * END SUBROUTINE DCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests DSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DSYRK * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL DMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA BETS = BETA DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL DSYRK( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LDE( CS, CC, LCC ) ELSE ISAME( 9 ) = LDERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL DMMCH( 'T', 'N', LJ, 1, K, ALPHA, $ A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL DMMCH( 'N', 'T', LJ, 1, K, ALPHA, $ A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK4. * END SUBROUTINE DCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests DSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DSYR2K * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N NULL = N.LE.0 * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BETS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL DSYR2K( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LDE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LDE( CS, CC, LCC ) ELSE ISAME( 11 ) = LDERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = AB( ( J - 1 )*2*NMAX + K + $ I ) W( K + I ) = AB( ( J - 1 )*2*NMAX + $ I ) 50 CONTINUE CALL DMMCH( 'T', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJAB ), 2*NMAX, $ W, 2*NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE DO 60 I = 1, K W( I ) = AB( ( K + I - 1 )*NMAX + $ J ) W( K + I ) = AB( ( I - 1 )*NMAX + $ J ) 60 CONTINUE CALL DMMCH( 'N', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJ ), NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK5. * END SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA and BETA (eca) * 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, BETA * .. Local Arrays .. DOUBLE PRECISION A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL CHKXER, DGEMM, DSYMM, DSYR2K, DSYRK, DTRMM, $ DTRSM * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA and BETA. * ALPHA = ONE BETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL DGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL DGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 20 INFOT = 1 CALL DSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 30 INFOT = 1 CALL DTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 40 INFOT = 1 CALL DTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 50 INFOT = 1 CALL DSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYRK( 'U', '/', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 60 INFOT = 1 CALL DSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYR2K( 'U', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 70 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of DCHKE. * END SUBROUTINE DMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) DOUBLE PRECISION ROGUE PARAMETER ( ROGUE = -1.0D10 ) * .. Scalar Arguments .. DOUBLE PRECISION TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. DOUBLE PRECISION DBEG EXTERNAL DBEG * .. Executable Statements .. GEN = TYPE.EQ.'GE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = DBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE END IF RETURN * * End of DMAKE. * END SUBROUTINE DMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA, EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ), G( * ) * .. Local Scalars .. DOUBLE PRECISION ERRI INTEGER I, J, K LOGICAL TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 120 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE IF( .NOT.TRANA.AND.TRANB )THEN DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( J, K ) ) 60 CONTINUE 70 CONTINUE ELSE IF( TRANA.AND.TRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( J, K ) ) 80 CONTINUE 90 CONTINUE END IF DO 100 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( I, J ) ) 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 110 I = 1, M ERRI = ABS( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 130 110 CONTINUE * 120 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 150 * * Report fatal error. * 130 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 140 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 150 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of DMMCH. * END LOGICAL FUNCTION LDE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. DOUBLE PRECISION RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LDE = .TRUE. GO TO 30 20 CONTINUE LDE = .FALSE. 30 RETURN * * End of LDE. * END LOGICAL FUNCTION LDERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LDERES = .TRUE. GO TO 80 70 CONTINUE LDERES = .FALSE. 80 RETURN * * End of LDERES. * END DOUBLE PRECISION FUNCTION DBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF DBEG = ( I - 500 )/1001.0D0 RETURN * * End of DBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/fortran/run-f2c.sh000077500000000000000000000064751474157777200202430ustar00rootroot00000000000000#!/bin/bash # This script converts netlib [sdcz]blat[123].f files from Fortran to C. # # Start by converting to C with f2c. # Options used: # -A Produce ANSI C (instead of old-style C). # -R Do not promote REAL functions and operations to DOUBLE PRECISION. # -a Make local variables automatic rather than static (unless they # appear in a DATA, EQUIVALENCE, NAMELIST, or SAVE statement). f2c -A -R -a *.f # Add 'const' qualifier to certain function delcarations so they match # the prototypes taken from libf2c. recursive-sed.sh -c "s/s_cmp(char \*, char/s_cmp(const char \*, const char/g" -p "*.c" recursive-sed.sh -c "s/s_copy(char \*, char/s_copy(char \*, const char/g" -p "*.c" recursive-sed.sh -c "s/d_cnjg(doublecomplex \*, doublecomplex/d_cnjg(doublecomplex *, const doublecomplex/g" -p "*.c" recursive-sed.sh -c "s/d_imag(doublecomplex/d_imag(const doublecomplex/g" -p "*.c" recursive-sed.sh -c "s/c_abs(complex/c_abs(const complex/g" -p "*.c" recursive-sed.sh -c "s/z_abs(doublecomplex/c_abs(const doublecomplex/g" -p "*.c" # Use main() and 'void' instead of MAIN__ and VOID. recursive-sed.sh -c "s/MAIN__/main/g" -p "*.c" recursive-sed.sh -c "s/VOID/void/g" -p "*.c" # Add prefix to calls to epsilon_() based on the file in which the # function is called. [sd]_epsilon_() are not libf2c functions, but # they are present in the local subset of libf2c used to link the # BLAS testsuite drivers. recursive-sed.sh -c "s/epsilon_/s_epsilon_/g" -p "[sc]*.c" recursive-sed.sh -c "s/epsilon_/d_epsilon_/g" -p "[dz]*.c" # The dsdot_() check needs s_epsilon_(), not d_epsilon_(). recursive-sed.sh -c "s/real d_epsilon_()/real s_epsilon_()/g" -p "d*1.c" recursive-sed.sh -c "s/d_epsilon_(\&c_b81)/s_epsilon_(\&c_b81)/g" -p "d*1.c" # Fix type inconsistencies in the original Fortran file vis-a-vis # epsilon() and abs(). recursive-sed.sh -c "s/real d_epsilon_(doublereal/double d_epsilon_(doublereal/g" -p "[dz]*.c" recursive-sed.sh -c "s/c_abs/z_abs/g" -p "z*.c" # Fix missing braces around struct initializers. recursive-sed.sh -c "s/equiv_3 = {/equiv_3 = {{/g" -p "[sd]*1.c" recursive-sed.sh -c "s/equiv_7 = {/equiv_7 = {{/g" -p "[sd]*1.c" recursive-sed.sh -c "s/0., 0., 0. }/0., 0., 0. }}/g" -p "d*1.c" recursive-sed.sh -c "s/2.9, .2, -4. }/2.9, .2, -4. }}/g" -p "d*1.c" recursive-sed.sh -c "s/0.f, 0.f, 0.f }/0.f, 0.f, 0.f }}/g" -p "s*1.c" recursive-sed.sh -c "s/-4.f };/-4.f }};/g" -p "s*1.c" # Convert from brain-dead f2c complex calling conventions to normal # return-based conventions. subst1='\n#ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL\n&\n#else\n' subst2='\n#endif\n' recursive-sed.sh -c "s/ void cdotc_(complex \*,/${subst1}complex cdotc_(${subst2}/g" -p "c*1.c" recursive-sed.sh -c "s/ void cdotu_(complex \*,/${subst1}complex cdotu_(${subst2}/g" -p "c*1.c" recursive-sed.sh -c "s/\(.*\)cdotc_(&q__1,/${subst1}\1q__1 = cdotc_(${subst2}\1/g" -p "c*1.c" recursive-sed.sh -c "s/\(.*\)cdotu_(&q__1,/${subst1}\1q__1 = cdotu_(${subst2}\1/g" -p "c*1.c" recursive-sed.sh -c "s/ void zdotc_(doublecomplex \*,/${subst1}doublecomplex zdotc_(${subst2}/g" -p "z*1.c" recursive-sed.sh -c "s/ void zdotu_(doublecomplex \*,/${subst1}doublecomplex zdotu_(${subst2}/g" -p "z*1.c" recursive-sed.sh -c "s/\(.*\)zdotc_(\&z__1,/${subst1}\1z__1 = zdotc_(${subst2}\1/g" -p "z*1.c" recursive-sed.sh -c "s/\(.*\)zdotu_(\&z__1,/${subst1}\1z__1 = zdotu_(${subst2}\1/g" -p "z*1.c" blis-1.1/blastest/src/fortran/sblat1.f000066400000000000000000001245741474157777200177660ustar00rootroot00000000000000*> \brief \b SBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM SBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the REAL Level 1 BLAS. *> *> Based upon the original BLAS test routine together with: *> F06EAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup single_blas_testing * * ===================================================================== PROGRAM SBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK0, CHECK1, CHECK2, CHECK3, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SFAC/9.765625E-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 13 ICASE = IC CALL HEADER * * .. Initialize PASS, INCX, and INCY for a new case. .. * .. the value 9999 for INCX or INCY will appear in the .. * .. detailed output, if any, for cases that do not involve .. * .. these parameters .. * PASS = .TRUE. INCX = 9999 INCY = 9999 IF (ICASE.EQ.3 .OR. ICASE.EQ.11) THEN CALL CHECK0(SFAC) ELSE IF (ICASE.EQ.7 .OR. ICASE.EQ.8 .OR. ICASE.EQ.9 .OR. + ICASE.EQ.10) THEN CALL CHECK1(SFAC) ELSE IF (ICASE.EQ.1 .OR. ICASE.EQ.2 .OR. ICASE.EQ.5 .OR. + ICASE.EQ.6 .OR. ICASE.EQ.12 .OR. ICASE.EQ.13) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.EQ.4) THEN CALL CHECK3(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Real BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(13) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA L(1)/' SDOT '/ DATA L(2)/'SAXPY '/ DATA L(3)/'SROTG '/ DATA L(4)/' SROT '/ DATA L(5)/'SCOPY '/ DATA L(6)/'SSWAP '/ DATA L(7)/'SNRM2 '/ DATA L(8)/'SASUM '/ DATA L(9)/'SSCAL '/ DATA L(10)/'ISAMAX'/ DATA L(11)/'SROTMG'/ DATA L(12)/'SROTM '/ DATA L(13)/'SDSDOT'/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK0(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL D12, SA, SB, SC, SS INTEGER I, K * .. Local Arrays .. REAL DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), + DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. EXTERNAL SROTG, SROTMG, STEST1 * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA DA1/0.3E0, 0.4E0, -0.3E0, -0.4E0, -0.3E0, 0.0E0, + 0.0E0, 1.0E0/ DATA DB1/0.4E0, 0.3E0, 0.4E0, 0.3E0, -0.4E0, 0.0E0, + 1.0E0, 0.0E0/ DATA DC1/0.6E0, 0.8E0, -0.6E0, 0.8E0, 0.6E0, 1.0E0, + 0.0E0, 1.0E0/ DATA DS1/0.8E0, 0.6E0, 0.8E0, -0.6E0, 0.8E0, 0.0E0, + 1.0E0, 0.0E0/ DATA DATRUE/0.5E0, 0.5E0, 0.5E0, -0.5E0, -0.5E0, + 0.0E0, 1.0E0, 1.0E0/ DATA DBTRUE/0.0E0, 0.6E0, 0.0E0, -0.6E0, 0.0E0, + 0.0E0, 1.0E0, 0.0E0/ * INPUT FOR MODIFIED GIVENS DATA DAB/ .1E0,.3E0,1.2E0,.2E0, A .7E0, .2E0, .6E0, 4.2E0, B 0.E0,0.E0,0.E0,0.E0, C 4.E0, -1.E0, 2.E0, 4.E0, D 6.E-10, 2.E-2, 1.E5, 10.E0, E 4.E10, 2.E-2, 1.E-5, 10.E0, F 2.E-10, 4.E-2, 1.E5, 10.E0, G 2.E10, 4.E-2, 1.E-5, 10.E0, H 4.E0, -2.E0, 8.E0, 4.E0 / * TRUE RESULTS FOR MODIFIED GIVENS DATA DTRUE/0.E0,0.E0, 1.3E0, .2E0, 0.E0,0.E0,0.E0, .5E0, 0.E0, A 0.E0,0.E0, 4.5E0, 4.2E0, 1.E0, .5E0, 0.E0,0.E0,0.E0, B 0.E0,0.E0,0.E0,0.E0, -2.E0, 0.E0,0.E0,0.E0,0.E0, C 0.E0,0.E0,0.E0, 4.E0, -1.E0, 0.E0,0.E0,0.E0,0.E0, D 0.E0, 15.E-3, 0.E0, 10.E0, -1.E0, 0.E0, -1.E-4, E 0.E0, 1.E0, F 0.E0,0.E0, 6144.E-5, 10.E0, -1.E0, 4096.E0, -1.E6, G 0.E0, 1.E0, H 0.E0,0.E0,15.E0,10.E0,-1.E0, 5.E-5, 0.E0,1.E0,0.E0, I 0.E0,0.E0, 15.E0, 10.E0, -1. E0, 5.E5, -4096.E0, J 1.E0, 4096.E-6, K 0.E0,0.E0, 7.E0, 4.E0, 0.E0,0.E0, -.5E0, -.25E0, 0.E0/ * 4096 = 2 ** 12 DATA D12 /4096.E0/ DTRUE(1,1) = 12.E0 / 130.E0 DTRUE(2,1) = 36.E0 / 130.E0 DTRUE(7,1) = -1.E0 / 6.E0 DTRUE(1,2) = 14.E0 / 75.E0 DTRUE(2,2) = 49.E0 / 75.E0 DTRUE(9,2) = 1.E0 / 7.E0 DTRUE(1,5) = 45.E-11 * (D12 * D12) DTRUE(3,5) = 4.E5 / (3.E0 * D12) DTRUE(6,5) = 1.E0 / D12 DTRUE(8,5) = 1.E4 / (3.E0 * D12) DTRUE(1,6) = 4.E10 / (1.5E0 * D12 * D12) DTRUE(2,6) = 2.E-2 / 1.5E0 DTRUE(8,6) = 5.E-7 * D12 DTRUE(1,7) = 4.E0 / 150.E0 DTRUE(2,7) = (2.E-10 / 1.5E0) * (D12 * D12) DTRUE(7,7) = -DTRUE(6,5) DTRUE(9,7) = 1.E4 / D12 DTRUE(1,8) = DTRUE(1,7) DTRUE(2,8) = 2.E10 / (1.5E0 * D12 * D12) DTRUE(1,9) = 32.E0 / 7.E0 DTRUE(2,9) = -16.E0 / 7.E0 * .. Executable Statements .. * * Compute true values which cannot be prestored * in decimal notation * DBTRUE(1) = 1.0E0/0.6E0 DBTRUE(3) = -1.0E0/0.6E0 DBTRUE(5) = 1.0E0/0.6E0 * DO 20 K = 1, 8 * .. Set N=K for identification in output if any .. N = K IF (ICASE.EQ.3) THEN * .. SROTG .. IF (K.GT.8) GO TO 40 SA = DA1(K) SB = DB1(K) CALL SROTG(SA,SB,SC,SS) CALL STEST1(SA,DATRUE(K),DATRUE(K),SFAC) CALL STEST1(SB,DBTRUE(K),DBTRUE(K),SFAC) CALL STEST1(SC,DC1(K),DC1(K),SFAC) CALL STEST1(SS,DS1(K),DS1(K),SFAC) ELSEIF (ICASE.EQ.11) THEN * .. SROTMG .. DO I=1,4 DTEMP(I)= DAB(I,K) DTEMP(I+4) = 0.0 END DO DTEMP(9) = 0.0 CALL SROTMG(DTEMP(1),DTEMP(2),DTEMP(3),DTEMP(4),DTEMP(5)) CALL STEST(9,DTEMP,DTRUE(1,K),DTRUE(1,K),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' STOP END IF 20 CONTINUE 40 RETURN END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER I, LEN, NP1 * .. Local Arrays .. REAL DTRUE1(5), DTRUE3(5), DTRUE5(8,5,2), DV(8,5,2), + SA(10), STEMP(1), STRUE(8), SX(8) INTEGER ITRUE2(5) * .. External Functions .. REAL SASUM, SNRM2 INTEGER ISAMAX EXTERNAL SASUM, SNRM2, ISAMAX * .. External Subroutines .. EXTERNAL ITEST1, SSCAL, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SA/0.3E0, -1.0E0, 0.0E0, 1.0E0, 0.3E0, 0.3E0, + 0.3E0, 0.3E0, 0.3E0, 0.3E0/ DATA DV/0.1E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, + 2.0E0, 2.0E0, 0.3E0, 3.0E0, 3.0E0, 3.0E0, 3.0E0, + 3.0E0, 3.0E0, 3.0E0, 0.3E0, -0.4E0, 4.0E0, + 4.0E0, 4.0E0, 4.0E0, 4.0E0, 4.0E0, 0.2E0, + -0.6E0, 0.3E0, 5.0E0, 5.0E0, 5.0E0, 5.0E0, + 5.0E0, 0.1E0, -0.3E0, 0.5E0, -0.1E0, 6.0E0, + 6.0E0, 6.0E0, 6.0E0, 0.1E0, 8.0E0, 8.0E0, 8.0E0, + 8.0E0, 8.0E0, 8.0E0, 8.0E0, 0.3E0, 9.0E0, 9.0E0, + 9.0E0, 9.0E0, 9.0E0, 9.0E0, 9.0E0, 0.3E0, 2.0E0, + -0.4E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, + 0.2E0, 3.0E0, -0.6E0, 5.0E0, 0.3E0, 2.0E0, + 2.0E0, 2.0E0, 0.1E0, 4.0E0, -0.3E0, 6.0E0, + -0.5E0, 7.0E0, -0.1E0, 3.0E0/ DATA DTRUE1/0.0E0, 0.3E0, 0.5E0, 0.7E0, 0.6E0/ DATA DTRUE3/0.0E0, 0.3E0, 0.7E0, 1.1E0, 1.0E0/ DATA DTRUE5/0.10E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, + 2.0E0, 2.0E0, 2.0E0, -0.3E0, 3.0E0, 3.0E0, + 3.0E0, 3.0E0, 3.0E0, 3.0E0, 3.0E0, 0.0E0, 0.0E0, + 4.0E0, 4.0E0, 4.0E0, 4.0E0, 4.0E0, 4.0E0, + 0.20E0, -0.60E0, 0.30E0, 5.0E0, 5.0E0, 5.0E0, + 5.0E0, 5.0E0, 0.03E0, -0.09E0, 0.15E0, -0.03E0, + 6.0E0, 6.0E0, 6.0E0, 6.0E0, 0.10E0, 8.0E0, + 8.0E0, 8.0E0, 8.0E0, 8.0E0, 8.0E0, 8.0E0, + 0.09E0, 9.0E0, 9.0E0, 9.0E0, 9.0E0, 9.0E0, + 9.0E0, 9.0E0, 0.09E0, 2.0E0, -0.12E0, 2.0E0, + 2.0E0, 2.0E0, 2.0E0, 2.0E0, 0.06E0, 3.0E0, + -0.18E0, 5.0E0, 0.09E0, 2.0E0, 2.0E0, 2.0E0, + 0.03E0, 4.0E0, -0.09E0, 6.0E0, -0.15E0, 7.0E0, + -0.03E0, 3.0E0/ DATA ITRUE2/0, 1, 2, 2, 3/ * .. Executable Statements .. DO 80 INCX = 1, 2 DO 60 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN SX(I) = DV(I,NP1,INCX) 20 CONTINUE * IF (ICASE.EQ.7) THEN * .. SNRM2 .. STEMP(1) = DTRUE1(NP1) CALL STEST1(SNRM2(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. SASUM .. STEMP(1) = DTRUE3(NP1) CALL STEST1(SASUM(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. SSCAL .. CALL SSCAL(N,SA((INCX-1)*5+NP1),SX,INCX) DO 40 I = 1, LEN STRUE(I) = DTRUE5(I,NP1,INCX) 40 CONTINUE CALL STEST(LEN,SX,STRUE,STRUE,SFAC) ELSE IF (ICASE.EQ.10) THEN * .. ISAMAX .. CALL ITEST1(ISAMAX(N,SX,INCX),ITRUE2(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF 60 CONTINUE 80 CONTINUE RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SA INTEGER I, J, KI, KN, KNI, KPAR, KSIZE, LENX, LENY, $ MX, MY * .. Local Arrays .. REAL DT10X(7,4,4), DT10Y(7,4,4), DT7(4,4), $ DT8(7,4,4), DX1(7), $ DY1(7), SSIZE1(4), SSIZE2(14,2), SSIZE3(4), $ SSIZE(7), STX(7), STY(7), SX(7), SY(7), $ DPAR(5,4), DT19X(7,4,16),DT19XA(7,4,4), $ DT19XB(7,4,4), DT19XC(7,4,4),DT19XD(7,4,4), $ DT19Y(7,4,16), DT19YA(7,4,4),DT19YB(7,4,4), $ DT19YC(7,4,4), DT19YD(7,4,4), DTEMP(5), $ ST7B(4,4) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. REAL SDOT, SDSDOT EXTERNAL SDOT, SDSDOT * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SROTM, SSWAP, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. EQUIVALENCE (DT19X(1,1,1),DT19XA(1,1,1)),(DT19X(1,1,5), A DT19XB(1,1,1)),(DT19X(1,1,9),DT19XC(1,1,1)), B (DT19X(1,1,13),DT19XD(1,1,1)) EQUIVALENCE (DT19Y(1,1,1),DT19YA(1,1,1)),(DT19Y(1,1,5), A DT19YB(1,1,1)),(DT19Y(1,1,9),DT19YC(1,1,1)), B (DT19Y(1,1,13),DT19YD(1,1,1)) DATA SA/0.3E0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.9E0, -0.3E0, + -0.4E0/ DATA DY1/0.5E0, -0.9E0, 0.3E0, 0.7E0, -0.6E0, 0.2E0, + 0.8E0/ DATA DT7/0.0E0, 0.30E0, 0.21E0, 0.62E0, 0.0E0, + 0.30E0, -0.07E0, 0.85E0, 0.0E0, 0.30E0, -0.79E0, + -0.74E0, 0.0E0, 0.30E0, 0.33E0, 1.27E0/ DATA ST7B/ .1, .4, .31, .72, .1, .4, .03, .95, + .1, .4, -.69, -.64, .1, .4, .43, 1.37/ DATA DT8/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.68E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.68E0, -0.87E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.68E0, -0.87E0, 0.15E0, + 0.94E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.68E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.35E0, -0.9E0, 0.48E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.38E0, -0.9E0, 0.57E0, 0.7E0, -0.75E0, + 0.2E0, 0.98E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.68E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.35E0, -0.72E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.38E0, + -0.63E0, 0.15E0, 0.88E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.68E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.68E0, -0.9E0, 0.33E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.68E0, -0.9E0, 0.33E0, 0.7E0, + -0.75E0, 0.2E0, 1.04E0/ DATA DT10X/0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, -0.9E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.5E0, -0.9E0, 0.3E0, 0.7E0, + 0.0E0, 0.0E0, 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.3E0, 0.1E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.8E0, 0.1E0, -0.6E0, + 0.8E0, 0.3E0, -0.3E0, 0.5E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, -0.9E0, + 0.1E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.7E0, + 0.1E0, 0.3E0, 0.8E0, -0.9E0, -0.3E0, 0.5E0, + 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.3E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.3E0, -0.6E0, 0.8E0, 0.0E0, 0.0E0, + 0.0E0/ DATA DT10Y/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.1E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.0E0, + 0.0E0, 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, -0.5E0, -0.9E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, -0.4E0, -0.9E0, 0.9E0, + 0.7E0, -0.5E0, 0.2E0, 0.6E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, -0.5E0, + 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + -0.4E0, 0.9E0, -0.5E0, 0.6E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, -0.9E0, 0.1E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.6E0, -0.9E0, 0.1E0, 0.7E0, + -0.5E0, 0.2E0, 0.8E0/ DATA SSIZE1/0.0E0, 0.3E0, 1.6E0, 3.2E0/ DATA SSIZE2/0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0/ DATA SSIZE3/ .1, .4, 1.7, 3.3 / * * FOR DROTM * DATA DPAR/-2.E0, 0.E0,0.E0,0.E0,0.E0, A -1.E0, 2.E0, -3.E0, -4.E0, 5.E0, B 0.E0, 0.E0, 2.E0, -3.E0, 0.E0, C 1.E0, 5.E0, 2.E0, 0.E0, -4.E0/ * TRUE X RESULTS F0R ROTATIONS DROTM DATA DT19XA/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I -.8E0, 3.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J -.9E0, 2.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K 3.5E0, -.4E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, 0.E0,0.E0,0.E0, M -.8E0, 3.8E0, -2.2E0, -1.2E0, 0.E0,0.E0,0.E0, N -.9E0, 2.8E0, -1.4E0, -1.3E0, 0.E0,0.E0,0.E0, O 3.5E0, -.4E0, -2.2E0, 4.7E0, 0.E0,0.E0,0.E0/ * DATA DT19XB/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, -.5E0, 0.E0,0.E0,0.E0,0.E0, I 0.E0, .1E0, -3.0E0, 0.E0,0.E0,0.E0,0.E0, J -.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, K 3.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, .9E0, -.3E0, -.4E0, M -2.0E0, .1E0, 1.4E0, .8E0, .6E0, -.3E0, -2.8E0, N -1.8E0, .1E0, 1.3E0, .8E0, 0.E0, -.3E0, -1.9E0, O 3.8E0, .1E0, -3.1E0, .8E0, 4.8E0, -.3E0, -1.5E0 / * DATA DT19XC/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, -.5E0, 0.E0,0.E0,0.E0,0.E0, I 4.8E0, .1E0, -3.0E0, 0.E0,0.E0,0.E0,0.E0, J 3.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, K 2.1E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, .9E0, -.3E0, -.4E0, M -1.6E0, .1E0, -2.2E0, .8E0, 5.4E0, -.3E0, -2.8E0, N -1.5E0, .1E0, -1.4E0, .8E0, 3.6E0, -.3E0, -1.9E0, O 3.7E0, .1E0, -2.2E0, .8E0, 3.6E0, -.3E0, -1.5E0 / * DATA DT19XD/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I -.8E0, -1.0E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J -.9E0, -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K 3.5E0, .8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, 0.E0,0.E0,0.E0, M -.8E0, -1.0E0, 1.4E0, -1.6E0, 0.E0,0.E0,0.E0, N -.9E0, -.8E0, 1.3E0, -1.6E0, 0.E0,0.E0,0.E0, O 3.5E0, .8E0, -3.1E0, 4.8E0, 0.E0,0.E0,0.E0/ * TRUE Y RESULTS FOR ROTATIONS DROTM DATA DT19YA/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I .7E0, -4.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J 1.7E0, -.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K -2.6E0, 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, 0.E0,0.E0,0.E0, M .7E0, -4.8E0, 3.0E0, 1.1E0, 0.E0,0.E0,0.E0, N 1.7E0, -.7E0, -.7E0, 2.3E0, 0.E0,0.E0,0.E0, O -2.6E0, 3.5E0, -.7E0, -3.6E0, 0.E0,0.E0,0.E0/ * DATA DT19YB/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, .3E0, 0.E0,0.E0,0.E0,0.E0, I 4.0E0, -.9E0, -.3E0, 0.E0,0.E0,0.E0,0.E0, J -.5E0, -.9E0, 1.5E0, 0.E0,0.E0,0.E0,0.E0, K -1.5E0, -.9E0, -1.8E0, 0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, -.6E0, .2E0, .8E0, M 3.7E0, -.9E0, -1.2E0, .7E0, -1.5E0, .2E0, 2.2E0, N -.3E0, -.9E0, 2.1E0, .7E0, -1.6E0, .2E0, 2.0E0, O -1.6E0, -.9E0, -2.1E0, .7E0, 2.9E0, .2E0, -3.8E0 / * DATA DT19YC/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I 4.0E0, -6.3E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J -.5E0, .3E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K -1.5E0, 3.0E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, 0.E0,0.E0,0.E0, M 3.7E0, -7.2E0, 3.0E0, 1.7E0, 0.E0,0.E0,0.E0, N -.3E0, .9E0, -.7E0, 1.9E0, 0.E0,0.E0,0.E0, O -1.6E0, 2.7E0, -.7E0, -3.4E0, 0.E0,0.E0,0.E0/ * DATA DT19YD/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, .3E0, 0.E0,0.E0,0.E0,0.E0, I .7E0, -.9E0, 1.2E0, 0.E0,0.E0,0.E0,0.E0, J 1.7E0, -.9E0, .5E0, 0.E0,0.E0,0.E0,0.E0, K -2.6E0, -.9E0, -1.3E0, 0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, -.6E0, .2E0, .8E0, M .7E0, -.9E0, 1.2E0, .7E0, -1.5E0, .2E0, 1.6E0, N 1.7E0, -.9E0, .5E0, .7E0, -1.6E0, .2E0, 2.4E0, O -2.6E0, -.9E0, -1.3E0, .7E0, 2.9E0, .2E0, -4.0E0 / * * .. Executable Statements .. * DO 120 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 100 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. Initialize all argument arrays .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) 20 CONTINUE * IF (ICASE.EQ.1) THEN * .. SDOT .. CALL STEST1(SDOT(N,SX,INCX,SY,INCY),DT7(KN,KI),SSIZE1(KN) + ,SFAC) ELSE IF (ICASE.EQ.2) THEN * .. SAXPY .. CALL SAXPY(N,SA,SX,INCX,SY,INCY) DO 40 J = 1, LENY STY(J) = DT8(J,KN,KI) 40 CONTINUE CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.5) THEN * .. SCOPY .. DO 60 I = 1, 7 STY(I) = DT10Y(I,KN,KI) 60 CONTINUE CALL SCOPY(N,SX,INCX,SY,INCY) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0E0) ELSE IF (ICASE.EQ.6) THEN * .. SSWAP .. CALL SSWAP(N,SX,INCX,SY,INCY) DO 80 I = 1, 7 STX(I) = DT10X(I,KN,KI) STY(I) = DT10Y(I,KN,KI) 80 CONTINUE CALL STEST(LENX,SX,STX,SSIZE2(1,1),1.0E0) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0E0) ELSEIF (ICASE.EQ.12) THEN * .. SROTM .. KNI=KN+4*(KI-1) DO KPAR=1,4 DO I=1,7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I)= DT19X(I,KPAR,KNI) STY(I)= DT19Y(I,KPAR,KNI) END DO * DO I=1,5 DTEMP(I) = DPAR(I,KPAR) END DO * DO I=1,LENX SSIZE(I)=STX(I) END DO * SEE REMARK ABOVE ABOUT DT11X(1,2,7) * AND DT11X(5,3,8). IF ((KPAR .EQ. 2) .AND. (KNI .EQ. 7)) $ SSIZE(1) = 2.4E0 IF ((KPAR .EQ. 3) .AND. (KNI .EQ. 8)) $ SSIZE(5) = 1.8E0 * CALL SROTM(N,SX,INCX,SY,INCY,DTEMP) CALL STEST(LENX,SX,STX,SSIZE,SFAC) CALL STEST(LENY,SY,STY,STY,SFAC) END DO ELSEIF (ICASE.EQ.13) THEN * .. SDSROT .. CALL STEST1 (SDSDOT(N,.1,SX,INCX,SY,INCY), $ ST7B(KN,KI),SSIZE3(KN),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF 100 CONTINUE 120 CONTINUE RETURN END SUBROUTINE CHECK3(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SC, SS INTEGER I, K, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. REAL COPYX(5), COPYY(5), DT9X(7,4,4), DT9Y(7,4,4), + DX1(7), DY1(7), MWPC(11), MWPS(11), MWPSTX(5), + MWPSTY(5), MWPTX(11,5), MWPTY(11,5), MWPX(5), + MWPY(5), SSIZE2(14,2), STX(7), STY(7), SX(7), + SY(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), MWPINX(11), + MWPINY(11), MWPN(11), NS(4) * .. External Subroutines .. EXTERNAL SROT, STEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.9E0, -0.3E0, + -0.4E0/ DATA DY1/0.5E0, -0.9E0, 0.3E0, 0.7E0, -0.6E0, 0.2E0, + 0.8E0/ DATA SC, SS/0.8E0, 0.6E0/ DATA DT9X/0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.78E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.78E0, -0.46E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.78E0, -0.46E0, -0.22E0, + 1.06E0, 0.0E0, 0.0E0, 0.0E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.78E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.66E0, 0.1E0, -0.1E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.96E0, 0.1E0, -0.76E0, 0.8E0, 0.90E0, + -0.3E0, -0.02E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.78E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, -0.06E0, 0.1E0, + -0.1E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.90E0, + 0.1E0, -0.22E0, 0.8E0, 0.18E0, -0.3E0, -0.02E0, + 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.78E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.78E0, 0.26E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.78E0, 0.26E0, -0.76E0, 1.12E0, + 0.0E0, 0.0E0, 0.0E0/ DATA DT9Y/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.04E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.04E0, -0.78E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.04E0, -0.78E0, 0.54E0, + 0.08E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.04E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.7E0, + -0.9E0, -0.12E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.64E0, -0.9E0, -0.30E0, 0.7E0, -0.18E0, 0.2E0, + 0.28E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.04E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.7E0, -1.08E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.64E0, -1.26E0, + 0.54E0, 0.20E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.04E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.04E0, -0.9E0, 0.18E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.04E0, -0.9E0, 0.18E0, 0.7E0, + -0.18E0, 0.2E0, 0.16E0/ DATA SSIZE2/0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0/ * .. Executable Statements .. * DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * IF (ICASE.EQ.4) THEN * .. SROT .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I) = DT9X(I,KN,KI) STY(I) = DT9Y(I,KN,KI) 20 CONTINUE CALL SROT(N,SX,INCX,SY,INCY,SC,SS) CALL STEST(LENX,SX,STX,SSIZE2(1,KSIZE),SFAC) CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' STOP END IF 40 CONTINUE 60 CONTINUE * MWPC(1) = 1 DO 80 I = 2, 11 MWPC(I) = 0 80 CONTINUE MWPS(1) = 0 DO 100 I = 2, 6 MWPS(I) = 1 100 CONTINUE DO 120 I = 7, 11 MWPS(I) = -1 120 CONTINUE MWPINX(1) = 1 MWPINX(2) = 1 MWPINX(3) = 1 MWPINX(4) = -1 MWPINX(5) = 1 MWPINX(6) = -1 MWPINX(7) = 1 MWPINX(8) = 1 MWPINX(9) = -1 MWPINX(10) = 1 MWPINX(11) = -1 MWPINY(1) = 1 MWPINY(2) = 1 MWPINY(3) = -1 MWPINY(4) = -1 MWPINY(5) = 2 MWPINY(6) = 1 MWPINY(7) = 1 MWPINY(8) = -1 MWPINY(9) = -1 MWPINY(10) = 2 MWPINY(11) = 1 DO 140 I = 1, 11 MWPN(I) = 5 140 CONTINUE MWPN(5) = 3 MWPN(10) = 3 DO 160 I = 1, 5 MWPX(I) = I MWPY(I) = I MWPTX(1,I) = I MWPTY(1,I) = I MWPTX(2,I) = I MWPTY(2,I) = -I MWPTX(3,I) = 6 - I MWPTY(3,I) = I - 6 MWPTX(4,I) = I MWPTY(4,I) = -I MWPTX(6,I) = 6 - I MWPTY(6,I) = I - 6 MWPTX(7,I) = -I MWPTY(7,I) = I MWPTX(8,I) = I - 6 MWPTY(8,I) = 6 - I MWPTX(9,I) = -I MWPTY(9,I) = I MWPTX(11,I) = I - 6 MWPTY(11,I) = 6 - I 160 CONTINUE MWPTX(5,1) = 1 MWPTX(5,2) = 3 MWPTX(5,3) = 5 MWPTX(5,4) = 4 MWPTX(5,5) = 5 MWPTY(5,1) = -1 MWPTY(5,2) = 2 MWPTY(5,3) = -2 MWPTY(5,4) = 4 MWPTY(5,5) = -3 MWPTX(10,1) = -1 MWPTX(10,2) = -3 MWPTX(10,3) = -5 MWPTX(10,4) = 4 MWPTX(10,5) = 5 MWPTY(10,1) = 1 MWPTY(10,2) = 2 MWPTY(10,3) = 2 MWPTY(10,4) = 4 MWPTY(10,5) = 3 DO 200 I = 1, 11 INCX = MWPINX(I) INCY = MWPINY(I) DO 180 K = 1, 5 COPYX(K) = MWPX(K) COPYY(K) = MWPY(K) MWPSTX(K) = MWPTX(I,K) MWPSTY(K) = MWPTY(I,K) 180 CONTINUE CALL SROT(MWPN(I),COPYX,INCX,COPYY,INCY,MWPC(I),MWPS(I)) CALL STEST(5,COPYX,MWPSTX,MWPSTX,SFAC) CALL STEST(5,COPYY,MWPSTY,MWPSTY,SFAC) 200 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT REAL ZERO PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN * .. Array Arguments .. REAL SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SD INTEGER I * .. External Functions .. REAL SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,2I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. REAL SCOMP1, SFAC, STRUE1 * .. Array Arguments .. REAL SSIZE(*) * .. Local Arrays .. REAL SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END REAL FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. REAL SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,2I5,2I36,I12) END blis-1.1/blastest/src/fortran/sblat2.f000066400000000000000000003331731474157777200177640ustar00rootroot00000000000000*> \brief \b SBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM SBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the REAL Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 16 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 34 lines: *> 'sblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 0.9 VALUES OF BETA *> SGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> SGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> SSYMV T PUT F FOR NO TEST. SAME COLUMNS. *> SSBMV T PUT F FOR NO TEST. SAME COLUMNS. *> SSPMV T PUT F FOR NO TEST. SAME COLUMNS. *> STRMV T PUT F FOR NO TEST. SAME COLUMNS. *> STBMV T PUT F FOR NO TEST. SAME COLUMNS. *> STPMV T PUT F FOR NO TEST. SAME COLUMNS. *> STRSV T PUT F FOR NO TEST. SAME COLUMNS. *> STBSV T PUT F FOR NO TEST. SAME COLUMNS. *> STPSV T PUT F FOR NO TEST. SAME COLUMNS. *> SGER T PUT F FOR NO TEST. SAME COLUMNS. *> SSYR T PUT F FOR NO TEST. SAME COLUMNS. *> SSPR T PUT F FOR NO TEST. SAME COLUMNS. *> SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. *> SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup single_blas_testing * * ===================================================================== PROGRAM SBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ G( NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LSE EXTERNAL SDIFF, LSE * .. External Subroutines .. EXTERNAL SCHK1, SCHK2, SCHK3, SCHK4, SCHK5, SCHK6, $ SCHKE, SMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'SGEMV ', 'SGBMV ', 'SSYMV ', 'SSBMV ', $ 'SSPMV ', 'STRMV ', 'STBMV ', 'STPMV ', $ 'STRSV ', 'STBSV ', 'STPSV ', 'SGER ', $ 'SSYR ', 'SSPR ', 'SSYR2 ', 'SSPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from SMVCH YT holds * the result computed by SMVCH. TRANS = 'N' CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL SCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 180, 180, $ 190, 190 )ISNUM * Test SGEMV, 01, and SGBMV, 02. 140 CALL SCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test SSYMV, 03, SSBMV, 04, and SSPMV, 05. 150 CALL SCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test STRMV, 06, STBMV, 07, STPMV, 08, * STRSV, 09, STBSV, 10, and STPSV, 11. 160 CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test SGER, 12. 170 CALL SCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test SSYR, 13, and SSPR, 14. 180 CALL SCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test SSYR2, 15, and SSPR2, 16. 190 CALL SCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE REAL LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9988 FORMAT( ' FOR BETA ', 7F6.1 ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN SMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' SMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of SBLAT2. * END SUBROUTINE SCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests SGEMV and SGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF PARAMETER ( ZERO = 0.0, HALF = 0.5 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SGBMV, SGEMV, SMAKE, SMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL SGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL SGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LSE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LSE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LSE( YS, YY, LY ) ELSE ISAME( 10 ) = LSERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LSE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LSE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LSE( YS, YY, LY ) ELSE ISAME( 12 ) = LSERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL SMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK1. * END SUBROUTINE SCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests SSYMV, SSBMV and SSPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF PARAMETER ( ZERO = 0.0, HALF = 0.5 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, SSBMV, SSPMV, SSYMV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL SSYMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL SSBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL SSPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LSE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LSE( YS, YY, LY ) ELSE ISAME( 9 ) = LSERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LSE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LSE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LSE( YS, YY, LY ) ELSE ISAME( 10 ) = LSERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( AS, AA, LAA ) ISAME( 5 ) = LSE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LSE( YS, YY, LY ) ELSE ISAME( 8 ) = LSERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL SMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', AP', $ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', A,', $ I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK2. * END SUBROUTINE SCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XT( NMAX ), $ XX( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. REAL ERR, ERRMAX, TRANSL INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, STBMV, STBSV, STPMV, STPSV, $ STRMV, STRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero vector for SMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL STPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL STPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LSE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LSE( XS, XX, LX ) ELSE ISAME( 7 ) = LSERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LSE( XS, XX, LX ) ELSE ISAME( 8 ) = LSERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LSE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LSE( XS, XX, LX ) ELSE ISAME( 6 ) = LSERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL SMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK3. * END SUBROUTINE SCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests SGER. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL NULL, RESET, SAME * .. Local Arrays .. REAL W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SGER, SMAKE, SMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL SGER( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LSE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LSE( AS, AA, LAA ) ELSE ISAME( 8 ) = LSERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF CALL SMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), F4.1, ', X,', I2, $ ', Y,', I2, ', A,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK4. * END SUBROUTINE SCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests SSYR and SSPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. REAL W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, SSPR, SSYR * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL SSYR( UPLO, N, ALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX IF( REWI ) $ REWIND NTRA CALL SSPR( UPLO, N, ALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LSE( AS, AA, LAA ) ELSE ISAME( 6 ) = LSERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = Z( J ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL SMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK5. * END SUBROUTINE SCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests SSYR2 and SSPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. REAL W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, SSPR2, SSYR2 * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL SSYR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL SSPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LSE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LSE( AS, AA, LAA ) ELSE ISAME( 8 ) = LSERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = Z( J, 2 ) W( 2 ) = Z( J, 1 ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL SMVCH( 'N', LJ, 2, ALPHA, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK6. * END SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. REAL ALPHA, BETA * .. Local Arrays .. REAL A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CHKXER, SGBMV, SGEMV, SGER, SSBMV, SSPMV, SSPR, $ SSPR2, SSYMV, SSYR, SSYR2, STBMV, STBSV, STPMV, $ STPSV, STRMV, STRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, $ 160 )ISNUM 10 INFOT = 1 CALL SGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL SGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL SGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 20 INFOT = 1 CALL SGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 30 INFOT = 1 CALL SSYMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSYMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 40 INFOT = 1 CALL SSBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL SSBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SSBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL SSBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 50 INFOT = 1 CALL SSPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL SSPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 60 INFOT = 1 CALL STRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL STRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 70 INFOT = 1 CALL STBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 80 INFOT = 1 CALL STPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 90 INFOT = 1 CALL STRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL STRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 100 INFOT = 1 CALL STBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 110 INFOT = 1 CALL STPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 120 INFOT = 1 CALL SGER( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGER( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGER( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SGER( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SGER( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 130 INFOT = 1 CALL SSYR( '/', 0, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYR( 'U', -1, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSYR( 'U', 0, ALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR( 'U', 2, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 140 INFOT = 1 CALL SSPR( '/', 0, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSPR( 'U', -1, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSPR( 'U', 0, ALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 150 INFOT = 1 CALL SSYR2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYR2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSYR2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 160 INFOT = 1 CALL SSPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 170 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of SCHKE. * END SUBROUTINE SMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) REAL ROGUE PARAMETER ( ROGUE = -1.0E10 ) * .. Scalar Arguments .. REAL TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. REAL SBEG EXTERNAL SBEG * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'S' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = SBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE 130 CONTINUE ELSE IF( TYPE.EQ.'SB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE 170 CONTINUE ELSE IF( TYPE.EQ.'SP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of SMAKE. * END SUBROUTINE SMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) * .. Scalar Arguments .. REAL ALPHA, BETA, EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. REAL A( NMAX, * ), G( * ), X( * ), Y( * ), YT( * ), $ YY( * ) * .. Local Scalars .. REAL ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL TRAN * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 30 I = 1, ML YT( IY ) = ZERO G( IY ) = ZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS( A( J, I )*X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE DO 20 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS( A( I, J )*X( JX ) ) JX = JX + INCXL 20 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS( ALPHA )*G( IY ) + ABS( BETA*Y( IY ) ) IY = IY + INCYL 30 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 40 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 50 40 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 70 * * Report fatal error. * 50 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 60 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT(I) END IF 60 CONTINUE * 70 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) * * End of SMVCH. * END LOGICAL FUNCTION LSE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. REAL RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LSE = .TRUE. GO TO 30 20 CONTINUE LSE = .FALSE. 30 RETURN * * End of LSE. * END LOGICAL FUNCTION LSERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'SY' or 'SP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LSERES = .TRUE. GO TO 80 70 CONTINUE LSERES = .FALSE. 80 RETURN * * End of LSERES. * END REAL FUNCTION SBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Intrinsic Functions .. INTRINSIC REAL * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF SBEG = REAL( I - 500 )/1001.0 RETURN * * End of SBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/fortran/sblat3.f000066400000000000000000003133541474157777200177640ustar00rootroot00000000000000*> \brief \b SBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM SBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the REAL Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 6 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 20 lines: *> 'sblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 1.3 VALUES OF BETA *> SGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> SSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> STRMM T PUT F FOR NO TEST. SAME COLUMNS. *> STRSM T PUT F FOR NO TEST. SAME COLUMNS. *> SSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup single_blas_testing * * ===================================================================== PROGRAM SBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. REAL AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LSE EXTERNAL SDIFF, LSE * .. External Subroutines .. EXTERNAL SCHK1, SCHK2, SCHK3, SCHK4, SCHK5, SCHKE, SMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'SGEMM ', 'SSYMM ', 'STRMM ', 'STRSM ', $ 'SSYRK ', 'SSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from SMMCH CT holds * the result computed by SMMCH. TRANSA = 'N' TRANSB = 'N' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'T' TRANSB = 'N' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL SCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 160, 160, 170, 180 )ISNUM * Test SGEMM, 01. 140 CALL SCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test SSYMM, 02. 150 CALL SCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test STRMM, 03, STRSM, 04. 160 CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test SSYRK, 05. 170 CALL SCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test SSYR2K, 06. 180 CALL SCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE REAL LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9992 FORMAT( ' FOR BETA ', 7F6.1 ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN SMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' SMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of SBLAT3. * END SUBROUTINE SCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests SGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SGEMM, SMAKE, SMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL SMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL SMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL SGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LSE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LSE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LSE( CS, CC, LCC ) ELSE ISAME( 12 ) = LSERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL SMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', $ 'C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK1. * END SUBROUTINE SCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests SSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, SSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL SMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the symmetric matrix A. * CALL SMAKE( 'SY', UPLO, ' ', NA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL SSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LSE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LSE( CS, CC, LCC ) ELSE ISAME( 11 ) = LSERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL SMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL SMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK2. * END SUBROUTINE SCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests STRMM and STRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, STRMM, STRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero matrix for SMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL SMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL SMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL STRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL STRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LSE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LSE( BS, BB, LBB ) ELSE ISAME( 10 ) = LSERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL SMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL SMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL SMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL SMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK3. * END SUBROUTINE SCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests SSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, SSYRK * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL SMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA BETS = BETA DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL SSYRK( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LSE( CS, CC, LCC ) ELSE ISAME( 9 ) = LSERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL SMMCH( 'T', 'N', LJ, 1, K, ALPHA, $ A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL SMMCH( 'N', 'T', LJ, 1, K, ALPHA, $ A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK4. * END SUBROUTINE SCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests SSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, SSYR2K * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N NULL = N.LE.0 * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BETS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL SSYR2K( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LSE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LSE( CS, CC, LCC ) ELSE ISAME( 11 ) = LSERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = AB( ( J - 1 )*2*NMAX + K + $ I ) W( K + I ) = AB( ( J - 1 )*2*NMAX + $ I ) 50 CONTINUE CALL SMMCH( 'T', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJAB ), 2*NMAX, $ W, 2*NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE DO 60 I = 1, K W( I ) = AB( ( K + I - 1 )*NMAX + $ J ) W( K + I ) = AB( ( I - 1 )*NMAX + $ J ) 60 CONTINUE CALL SMMCH( 'N', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJ ), NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK5. * END SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA and BETA (eca) * 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. REAL ALPHA, BETA * .. Local Arrays .. REAL A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL CHKXER, SGEMM, SSYMM, SSYR2K, SSYRK, STRMM, $ STRSM * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA and BETA. * ALPHA = ONE BETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL SGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL SGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 20 INFOT = 1 CALL SSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 30 INFOT = 1 CALL STRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 40 INFOT = 1 CALL STRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 50 INFOT = 1 CALL SSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYRK( 'U', '/', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 60 INFOT = 1 CALL SSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYR2K( 'U', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 70 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of SCHKE. * END SUBROUTINE SMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) REAL ROGUE PARAMETER ( ROGUE = -1.0E10 ) * .. Scalar Arguments .. REAL TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. REAL SBEG EXTERNAL SBEG * .. Executable Statements .. GEN = TYPE.EQ.'GE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = SBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE END IF RETURN * * End of SMAKE. * END SUBROUTINE SMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) * .. Scalar Arguments .. REAL ALPHA, BETA, EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ), G( * ) * .. Local Scalars .. REAL ERRI INTEGER I, J, K LOGICAL TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 120 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE IF( .NOT.TRANA.AND.TRANB )THEN DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( J, K ) ) 60 CONTINUE 70 CONTINUE ELSE IF( TRANA.AND.TRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( J, K ) ) 80 CONTINUE 90 CONTINUE END IF DO 100 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( I, J ) ) 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 110 I = 1, M ERRI = ABS( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 130 110 CONTINUE * 120 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 150 * * Report fatal error. * 130 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 140 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 150 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of SMMCH. * END LOGICAL FUNCTION LSE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. REAL RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LSE = .TRUE. GO TO 30 20 CONTINUE LSE = .FALSE. 30 RETURN * * End of LSE. * END LOGICAL FUNCTION LSERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LSERES = .TRUE. GO TO 80 70 CONTINUE LSERES = .FALSE. 80 RETURN * * End of LSERES. * END REAL FUNCTION SBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF SBEG = ( I - 500 )/1001.0 RETURN * * End of SBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/fortran/zblat1.f000066400000000000000000000765621474157777200200000ustar00rootroot00000000000000*> \brief \b ZBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM ZBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX*16 Level 1 BLAS. *> *> Based upon the original BLAS test routine together with: *> F06GAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex16_blas_testing * * ===================================================================== PROGRAM ZBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK1, CHECK2, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SFAC/9.765625D-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 10 ICASE = IC CALL HEADER * * Initialize PASS, INCX, INCY, and MODE for a new case. * The value 9999 for INCX, INCY or MODE will appear in the * detailed output, if any, for cases that do not involve * these parameters. * PASS = .TRUE. INCX = 9999 INCY = 9999 MODE = 9999 IF (ICASE.LE.5) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.GE.6) THEN CALL CHECK1(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Complex BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(10) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA L(1)/'ZDOTC '/ DATA L(2)/'ZDOTU '/ DATA L(3)/'ZAXPY '/ DATA L(4)/'ZCOPY '/ DATA L(5)/'ZSWAP '/ DATA L(6)/'DZNRM2'/ DATA L(7)/'DZASUM'/ DATA L(8)/'ZSCAL '/ DATA L(9)/'ZDSCAL'/ DATA L(10)/'IZAMAX'/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX*16 CA DOUBLE PRECISION SA INTEGER I, J, LEN, NP1 * .. Local Arrays .. COMPLEX*16 CTRUE5(8,5,2), CTRUE6(8,5,2), CV(8,5,2), CX(8), + MWPCS(5), MWPCT(5) DOUBLE PRECISION STRUE2(5), STRUE4(5) INTEGER ITRUE3(5) * .. External Functions .. DOUBLE PRECISION DZASUM, DZNRM2 INTEGER IZAMAX EXTERNAL DZASUM, DZNRM2, IZAMAX * .. External Subroutines .. EXTERNAL ZSCAL, ZDSCAL, CTEST, ITEST1, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SA, CA/0.3D0, (0.4D0,-0.7D0)/ DATA ((CV(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (0.3D0,-0.4D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (0.1D0,-0.3D0), (0.5D0,-0.1D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (0.1D0,0.1D0), + (-0.6D0,0.1D0), (0.1D0,-0.3D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.5D0,0.0D0), + (0.0D0,0.5D0), (0.0D0,0.2D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (0.3D0,-0.4D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (0.1D0,-0.3D0), (8.0D0,9.0D0), (0.5D0,-0.1D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (0.1D0,0.1D0), + (3.0D0,6.0D0), (-0.6D0,0.1D0), (4.0D0,7.0D0), + (0.1D0,-0.3D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.3D0,0.1D0), (5.0D0,8.0D0), + (0.5D0,0.0D0), (6.0D0,9.0D0), (0.0D0,0.5D0), + (8.0D0,3.0D0), (0.0D0,0.2D0), (9.0D0,4.0D0)/ DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.8D0/ DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.6D0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (-0.16D0,-0.37D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (-0.17D0,-0.19D0), (0.13D0,-0.39D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (0.11D0,-0.03D0), (-0.17D0,0.46D0), + (-0.17D0,-0.19D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (0.19D0,-0.17D0), (0.20D0,-0.35D0), + (0.35D0,0.20D0), (0.14D0,0.08D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0), + (2.0D0,3.0D0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (-0.16D0,-0.37D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (-0.17D0,-0.19D0), (8.0D0,9.0D0), + (0.13D0,-0.39D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (0.11D0,-0.03D0), (3.0D0,6.0D0), + (-0.17D0,0.46D0), (4.0D0,7.0D0), + (-0.17D0,-0.19D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.19D0,-0.17D0), (5.0D0,8.0D0), + (0.20D0,-0.35D0), (6.0D0,9.0D0), + (0.35D0,0.20D0), (8.0D0,3.0D0), + (0.14D0,0.08D0), (9.0D0,4.0D0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (0.09D0,-0.12D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (0.03D0,-0.09D0), (0.15D0,-0.03D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (0.03D0,0.03D0), (-0.18D0,0.03D0), + (0.03D0,-0.09D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (0.09D0,0.03D0), (0.15D0,0.00D0), + (0.00D0,0.15D0), (0.00D0,0.06D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (0.09D0,-0.12D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (0.03D0,-0.09D0), (8.0D0,9.0D0), + (0.15D0,-0.03D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (0.03D0,0.03D0), (3.0D0,6.0D0), + (-0.18D0,0.03D0), (4.0D0,7.0D0), + (0.03D0,-0.09D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.09D0,0.03D0), (5.0D0,8.0D0), + (0.15D0,0.00D0), (6.0D0,9.0D0), (0.00D0,0.15D0), + (8.0D0,3.0D0), (0.00D0,0.06D0), (9.0D0,4.0D0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 DO 40 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN CX(I) = CV(I,NP1,INCX) 20 CONTINUE IF (ICASE.EQ.6) THEN * .. DZNRM2 .. CALL STEST1(DZNRM2(N,CX,INCX),STRUE2(NP1),STRUE2(NP1), + SFAC) ELSE IF (ICASE.EQ.7) THEN * .. DZASUM .. CALL STEST1(DZASUM(N,CX,INCX),STRUE4(NP1),STRUE4(NP1), + SFAC) ELSE IF (ICASE.EQ.8) THEN * .. ZSCAL .. CALL ZSCAL(N,CA,CX,INCX) CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.9) THEN * .. ZDSCAL .. CALL ZDSCAL(N,SA,CX,INCX) CALL CTEST(LEN,CX,CTRUE6(1,NP1,INCX),CTRUE6(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.10) THEN * .. IZAMAX .. CALL ITEST1(IZAMAX(N,CX,INCX),ITRUE3(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF * 40 CONTINUE 60 CONTINUE * INCX = 1 IF (ICASE.EQ.8) THEN * ZSCAL * Add a test for alpha equal to zero. CA = (0.0D0,0.0D0) DO 80 I = 1, 5 MWPCT(I) = (0.0D0,0.0D0) MWPCS(I) = (1.0D0,1.0D0) 80 CONTINUE CALL ZSCAL(5,CA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) ELSE IF (ICASE.EQ.9) THEN * ZDSCAL * Add a test for alpha equal to zero. SA = 0.0D0 DO 100 I = 1, 5 MWPCT(I) = (0.0D0,0.0D0) MWPCS(I) = (1.0D0,1.0D0) 100 CONTINUE CALL ZDSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to one. SA = 1.0D0 DO 120 I = 1, 5 MWPCT(I) = CX(I) MWPCS(I) = CX(I) 120 CONTINUE CALL ZDSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to minus one. SA = -1.0D0 DO 140 I = 1, 5 MWPCT(I) = -CX(I) MWPCS(I) = -CX(I) 140 CONTINUE CALL ZDSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) END IF RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX*16 CA INTEGER I, J, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. COMPLEX*16 CDOT(1), CSIZE1(4), CSIZE2(7,2), CSIZE3(14), + CT10X(7,4,4), CT10Y(7,4,4), CT6(4,4), CT7(4,4), + CT8(7,4,4), CX(7), CX1(7), CY(7), CY1(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. COMPLEX*16 ZDOTC, ZDOTU EXTERNAL ZDOTC, ZDOTU * .. External Subroutines .. EXTERNAL ZAXPY, ZCOPY, ZSWAP, CTEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA CA/(0.4D0,-0.7D0)/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA CX1/(0.7D0,-0.8D0), (-0.4D0,-0.7D0), + (-0.1D0,-0.9D0), (0.2D0,-0.8D0), + (-0.9D0,-0.4D0), (0.1D0,0.4D0), (-0.6D0,0.6D0)/ DATA CY1/(0.6D0,-0.6D0), (-0.9D0,0.5D0), + (0.7D0,-0.6D0), (0.1D0,-0.5D0), (-0.1D0,-0.2D0), + (-0.5D0,-0.3D0), (0.8D0,-0.7D0)/ DATA ((CT8(I,J,1),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.32D0,-1.41D0), + (-1.55D0,0.5D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (-1.55D0,0.5D0), + (0.03D0,-0.89D0), (-0.38D0,-0.96D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT8(I,J,2),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.07D0,-0.89D0), + (-0.9D0,0.5D0), (0.42D0,-1.41D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.78D0,0.06D0), (-0.9D0,0.5D0), + (0.06D0,-0.13D0), (0.1D0,-0.5D0), + (-0.77D0,-0.49D0), (-0.5D0,-0.3D0), + (0.52D0,-1.51D0)/ DATA ((CT8(I,J,3),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.07D0,-0.89D0), + (-1.18D0,-0.31D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.78D0,0.06D0), (-1.54D0,0.97D0), + (0.03D0,-0.89D0), (-0.18D0,-1.31D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT8(I,J,4),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.32D0,-1.41D0), (-0.9D0,0.5D0), + (0.05D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.32D0,-1.41D0), + (-0.9D0,0.5D0), (0.05D0,-0.6D0), (0.1D0,-0.5D0), + (-0.77D0,-0.49D0), (-0.5D0,-0.3D0), + (0.32D0,-1.16D0)/ DATA CT7/(0.0D0,0.0D0), (-0.06D0,-0.90D0), + (0.65D0,-0.47D0), (-0.34D0,-1.22D0), + (0.0D0,0.0D0), (-0.06D0,-0.90D0), + (-0.59D0,-1.46D0), (-1.04D0,-0.04D0), + (0.0D0,0.0D0), (-0.06D0,-0.90D0), + (-0.83D0,0.59D0), (0.07D0,-0.37D0), + (0.0D0,0.0D0), (-0.06D0,-0.90D0), + (-0.76D0,-1.15D0), (-1.33D0,-1.82D0)/ DATA CT6/(0.0D0,0.0D0), (0.90D0,0.06D0), + (0.91D0,-0.77D0), (1.80D0,-0.10D0), + (0.0D0,0.0D0), (0.90D0,0.06D0), (1.45D0,0.74D0), + (0.20D0,0.90D0), (0.0D0,0.0D0), (0.90D0,0.06D0), + (-0.55D0,0.23D0), (0.83D0,-0.39D0), + (0.0D0,0.0D0), (0.90D0,0.06D0), (1.04D0,0.79D0), + (1.95D0,1.22D0)/ DATA ((CT10X(I,J,1),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.6D0,-0.6D0), (-0.9D0,0.5D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.6D0,-0.6D0), + (-0.9D0,0.5D0), (0.7D0,-0.6D0), (0.1D0,-0.5D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT10X(I,J,2),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.7D0,-0.6D0), (-0.4D0,-0.7D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.8D0,-0.7D0), + (-0.4D0,-0.7D0), (-0.1D0,-0.2D0), + (0.2D0,-0.8D0), (0.7D0,-0.6D0), (0.1D0,0.4D0), + (0.6D0,-0.6D0)/ DATA ((CT10X(I,J,3),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.9D0,0.5D0), (-0.4D0,-0.7D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.1D0,-0.5D0), + (-0.4D0,-0.7D0), (0.7D0,-0.6D0), (0.2D0,-0.8D0), + (-0.9D0,0.5D0), (0.1D0,0.4D0), (0.6D0,-0.6D0)/ DATA ((CT10X(I,J,4),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.6D0,-0.6D0), (0.7D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.6D0,-0.6D0), + (0.7D0,-0.6D0), (-0.1D0,-0.2D0), (0.8D0,-0.7D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT10Y(I,J,1),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.7D0,-0.8D0), (-0.4D0,-0.7D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.7D0,-0.8D0), + (-0.4D0,-0.7D0), (-0.1D0,-0.9D0), + (0.2D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0)/ DATA ((CT10Y(I,J,2),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.1D0,-0.9D0), (-0.9D0,0.5D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (-0.6D0,0.6D0), + (-0.9D0,0.5D0), (-0.9D0,-0.4D0), (0.1D0,-0.5D0), + (-0.1D0,-0.9D0), (-0.5D0,-0.3D0), + (0.7D0,-0.8D0)/ DATA ((CT10Y(I,J,3),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.1D0,-0.9D0), (0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (-0.6D0,0.6D0), + (-0.9D0,-0.4D0), (-0.1D0,-0.9D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0)/ DATA ((CT10Y(I,J,4),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.7D0,-0.8D0), (-0.9D0,0.5D0), + (-0.4D0,-0.7D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.7D0,-0.8D0), + (-0.9D0,0.5D0), (-0.4D0,-0.7D0), (0.1D0,-0.5D0), + (-0.1D0,-0.9D0), (-0.5D0,-0.3D0), + (0.2D0,-0.8D0)/ DATA CSIZE1/(0.0D0,0.0D0), (0.9D0,0.9D0), + (1.63D0,1.73D0), (2.90D0,2.78D0)/ DATA CSIZE3/(0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (1.17D0,1.17D0), + (1.17D0,1.17D0), (1.17D0,1.17D0), + (1.17D0,1.17D0), (1.17D0,1.17D0), + (1.17D0,1.17D0), (1.17D0,1.17D0)/ DATA CSIZE2/(0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (1.54D0,1.54D0), + (1.54D0,1.54D0), (1.54D0,1.54D0), + (1.54D0,1.54D0), (1.54D0,1.54D0), + (1.54D0,1.54D0), (1.54D0,1.54D0)/ * .. Executable Statements .. DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. initialize all argument arrays .. DO 20 I = 1, 7 CX(I) = CX1(I) CY(I) = CY1(I) 20 CONTINUE IF (ICASE.EQ.1) THEN * .. ZDOTC .. CDOT(1) = ZDOTC(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT6(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.2) THEN * .. ZDOTU .. CDOT(1) = ZDOTU(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT7(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.3) THEN * .. ZAXPY .. CALL ZAXPY(N,CA,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT8(1,KN,KI),CSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.4) THEN * .. ZCOPY .. CALL ZCOPY(N,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0D0) ELSE IF (ICASE.EQ.5) THEN * .. ZSWAP .. CALL ZSWAP(N,CX,INCX,CY,INCY) CALL CTEST(LENX,CX,CT10X(1,KN,KI),CSIZE3,1.0D0) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0D0) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF * 40 CONTINUE 60 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT DOUBLE PRECISION ZERO PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN * .. Array Arguments .. DOUBLE PRECISION SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SD INTEGER I * .. External Functions .. DOUBLE PRECISION SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,3I5,I3,2D36.8,2D12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. DOUBLE PRECISION SCOMP1, SFAC, STRUE1 * .. Array Arguments .. DOUBLE PRECISION SSIZE(*) * .. Local Arrays .. DOUBLE PRECISION SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END DOUBLE PRECISION FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. DOUBLE PRECISION SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE CTEST(LEN,CCOMP,CTRUE,CSIZE,SFAC) * **************************** CTEST ***************************** * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN * .. Array Arguments .. COMPLEX*16 CCOMP(LEN), CSIZE(LEN), CTRUE(LEN) * .. Local Scalars .. INTEGER I * .. Local Arrays .. DOUBLE PRECISION SCOMP(20), SSIZE(20), STRUE(20) * .. External Subroutines .. EXTERNAL STEST * .. Intrinsic Functions .. INTRINSIC DIMAG, DBLE * .. Executable Statements .. DO 20 I = 1, LEN SCOMP(2*I-1) = DBLE(CCOMP(I)) SCOMP(2*I) = DIMAG(CCOMP(I)) STRUE(2*I-1) = DBLE(CTRUE(I)) STRUE(2*I) = DIMAG(CTRUE(I)) SSIZE(2*I-1) = DBLE(CSIZE(I)) SSIZE(2*I) = DIMAG(CSIZE(I)) 20 CONTINUE * CALL STEST(2*LEN,SCOMP,STRUE,SSIZE,SFAC) RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,3I5,2I36,I12) END blis-1.1/blastest/src/fortran/zblat2.f000066400000000000000000003444131474157777200177720ustar00rootroot00000000000000*> \brief \b ZBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM ZBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX*16 Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 17 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 35 lines: *> 'zblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. *> ZGERC T PUT F FOR NO TEST. SAME COLUMNS. *> ZGERU T PUT F FOR NO TEST. SAME COLUMNS. *> ZHER T PUT F FOR NO TEST. SAME COLUMNS. *> ZHPR T PUT F FOR NO TEST. SAME COLUMNS. *> ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. *> ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex16_blas_testing * * ===================================================================== PROGRAM ZBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 17 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LZE EXTERNAL DDIFF, LZE * .. External Subroutines .. EXTERNAL ZCHK1, ZCHK2, ZCHK3, ZCHK4, ZCHK5, ZCHK6, $ ZCHKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'ZGEMV ', 'ZGBMV ', 'ZHEMV ', 'ZHBMV ', $ 'ZHPMV ', 'ZTRMV ', 'ZTBMV ', 'ZTPMV ', $ 'ZTRSV ', 'ZTBSV ', 'ZTPSV ', 'ZGERC ', $ 'ZGERU ', 'ZHER ', 'ZHPR ', 'ZHER2 ', $ 'ZHPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from ZMVCH YT holds * the result computed by ZMVCH. TRANS = 'N' CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL ZCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 170, 180, $ 180, 190, 190 )ISNUM * Test ZGEMV, 01, and ZGBMV, 02. 140 CALL ZCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test ZHEMV, 03, ZHBMV, 04, and ZHPMV, 05. 150 CALL ZCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test ZTRMV, 06, ZTBMV, 07, ZTPMV, 08, * ZTRSV, 09, ZTBSV, 10, and ZTPSV, 11. 160 CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test ZGERC, 12, ZGERU, 13. 170 CALL ZCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test ZHER, 14, and ZHPR, 15. 180 CALL ZCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test ZHER2, 16, and ZHPR2, 17. 190 CALL ZCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE COMPLEX*16 LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9988 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN ZMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' ZMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of ZBLAT2. * END SUBROUTINE ZCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests ZGEMV and ZGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZGBMV, ZGEMV, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL ZGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL ZGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LZE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LZE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LZE( YS, YY, LY ) ELSE ISAME( 10 ) = LZERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LZE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LZE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LZE( YS, YY, LY ) ELSE ISAME( 12 ) = LZERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL ZMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK1. * END SUBROUTINE ZCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests ZHEMV, ZHBMV and ZHPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHBMV, ZHEMV, ZHPMV, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL ZHEMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL ZHBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL ZHPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LZE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LZE( YS, YY, LY ) ELSE ISAME( 9 ) = LZERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LZE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LZE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LZE( YS, YY, LY ) ELSE ISAME( 10 ) = LZERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( AS, AA, LAA ) ISAME( 5 ) = LZE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LZE( YS, YY, LY ) ELSE ISAME( 8 ) = LZERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL ZMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), AP, X,', I2, ',(', F4.1, ',', F4.1, '), Y,', I2, $ ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', F4.1, '), ', $ 'Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK2. * END SUBROUTINE ZCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests ZTRMV, ZTBMV, ZTPMV, ZTRSV, ZTBSV and ZTPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XT( NMAX ), XX( NMAX*INCMAX ), Z( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX*16 TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZMAKE, ZMVCH, ZTBMV, ZTBSV, ZTPMV, ZTPSV, $ ZTRMV, ZTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero vector for ZMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL ZTPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL ZTPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LZE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LZE( XS, XX, LX ) ELSE ISAME( 7 ) = LZERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LZE( XS, XX, LX ) ELSE ISAME( 8 ) = LZERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LZE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LZE( XS, XX, LX ) ELSE ISAME( 6 ) = LZERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK3. * END SUBROUTINE ZCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests ZGERC and ZGERU. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL CONJ, NULL, RESET, SAME * .. Local Arrays .. COMPLEX*16 W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZGERC, ZGERU, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, DCONJG, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. CONJ = SNAME( 5: 5 ).EQ.'C' * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( CONJ )THEN IF( REWI ) $ REWIND NTRA CALL ZGERC( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) ELSE IF( REWI ) $ REWIND NTRA CALL ZGERU( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LZE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LZE( AS, AA, LAA ) ELSE ISAME( 8 ) = LZERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF IF( CONJ ) $ W( 1 ) = DCONJG( W( 1 ) ) CALL ZMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), '(', F4.1, ',', F4.1, $ '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK4. * END SUBROUTINE ZCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests ZHER and ZHPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX*16 ALPHA, TRANSL DOUBLE PRECISION ERR, ERRMAX, RALPHA, RALS INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX*16 W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHER, ZHPR, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF RALPHA = DBLE( ALF( IA ) ) ALPHA = DCMPLX( RALPHA, RZERO ) NULL = N.LE.0.OR.RALPHA.EQ.RZERO * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N RALS = RALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ RALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL ZHER( UPLO, N, RALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ RALPHA, INCX IF( REWI ) $ REWIND NTRA CALL ZHPR( UPLO, N, RALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = RALS.EQ.RALPHA ISAME( 4 ) = LZE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LZE( AS, AA, LAA ) ELSE ISAME( 6 ) = LZERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = DCONJG( Z( J ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL ZMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, RALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, RALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK5. * END SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests ZHER2 and ZHPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX*16 W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHER2, ZHPR2, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, DCONJG, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL ZHER2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL ZHPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LZE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LZE( AS, AA, LAA ) ELSE ISAME( 8 ) = LZERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = ALPHA*DCONJG( Z( J, 2 ) ) W( 2 ) = DCONJG( ALPHA )*DCONJG( Z( J, 1 ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL ZMVCH( 'N', LJ, 2, ONE, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', AP) ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK6. * END SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION RALPHA * .. Local Arrays .. COMPLEX*16 A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CHKXER, ZGBMV, ZGEMV, ZGERC, ZGERU, ZHBMV, $ ZHEMV, ZHER, ZHER2, ZHPMV, ZHPR, ZHPR2, ZTBMV, $ ZTBSV, ZTPMV, ZTPSV, ZTRMV, ZTRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, 160, $ 170 )ISNUM 10 INFOT = 1 CALL ZGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 20 INFOT = 1 CALL ZGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 30 INFOT = 1 CALL ZHEMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHEMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHEMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHEMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 40 INFOT = 1 CALL ZHBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZHBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZHBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZHBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 50 INFOT = 1 CALL ZHPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZHPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 60 INFOT = 1 CALL ZTRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZTRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 70 INFOT = 1 CALL ZTBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 80 INFOT = 1 CALL ZTPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 90 INFOT = 1 CALL ZTRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZTRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 100 INFOT = 1 CALL ZTBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 110 INFOT = 1 CALL ZTPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 120 INFOT = 1 CALL ZGERC( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGERC( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGERC( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZGERC( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZGERC( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 130 INFOT = 1 CALL ZGERU( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGERU( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGERU( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZGERU( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZGERU( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 140 INFOT = 1 CALL ZHER( '/', 0, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHER( 'U', -1, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHER( 'U', 0, RALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER( 'U', 2, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 150 INFOT = 1 CALL ZHPR( '/', 0, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHPR( 'U', -1, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHPR( 'U', 0, RALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 160 INFOT = 1 CALL ZHER2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHER2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHER2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 170 INFOT = 1 CALL ZHPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 180 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of ZCHKE. * END SUBROUTINE ZMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) COMPLEX*16 ROGUE PARAMETER ( ROGUE = ( -1.0D10, 1.0D10 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) DOUBLE PRECISION RROGUE PARAMETER ( RROGUE = -1.0D10 ) * .. Scalar Arguments .. COMPLEX*16 TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, JJ, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX*16 ZBEG EXTERNAL ZBEG * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX, MIN * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'H' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = ZBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = DCONJG( A( I, J ) ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( SYM ) $ A( J, J ) = DCMPLX( DBLE( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE IF( SYM )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = DCMPLX( DBLE( AA( JJ ) ), RROGUE ) END IF 130 CONTINUE ELSE IF( TYPE.EQ.'HB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE IF( SYM )THEN JJ = KK + ( J - 1 )*LDA AA( JJ ) = DCMPLX( DBLE( AA( JJ ) ), RROGUE ) END IF 170 CONTINUE ELSE IF( TYPE.EQ.'HP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE IF( SYM ) $ AA( IOFF ) = DCMPLX( DBLE( AA( IOFF ) ), RROGUE ) END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of ZMAKE. * END SUBROUTINE ZMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D0, RONE = 1.0D0 ) * .. Scalar Arguments .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. COMPLEX*16 A( NMAX, * ), X( * ), Y( * ), YT( * ), YY( * ) DOUBLE PRECISION G( * ) * .. Local Scalars .. COMPLEX*16 C DOUBLE PRECISION ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL CTRAN, TRAN * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, SQRT * .. Statement Functions .. DOUBLE PRECISION ABS1 * .. Statement Function definitions .. ABS1( C ) = ABS( DBLE( C ) ) + ABS( DIMAG( C ) ) * .. Executable Statements .. TRAN = TRANS.EQ.'T' CTRAN = TRANS.EQ.'C' IF( TRAN.OR.CTRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 40 I = 1, ML YT( IY ) = ZERO G( IY ) = RZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE IF( CTRAN )THEN DO 20 J = 1, NL YT( IY ) = YT( IY ) + DCONJG( A( J, I ) )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 20 CONTINUE ELSE DO 30 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS1( A( I, J ) )*ABS1( X( JX ) ) JX = JX + INCXL 30 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS1( ALPHA )*G( IY ) + ABS1( BETA )*ABS1( Y( IY ) ) IY = IY + INCYL 40 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 50 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 60 50 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 80 * * Report fatal error. * 60 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 70 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT( I ) END IF 70 CONTINUE * 80 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) * * End of ZMVCH. * END LOGICAL FUNCTION LZE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX*16 RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LZE = .TRUE. GO TO 30 20 CONTINUE LZE = .FALSE. 30 RETURN * * End of LZE. * END LOGICAL FUNCTION LZERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'HE' or 'HP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LZERES = .TRUE. GO TO 80 70 CONTINUE LZERES = .FALSE. 80 RETURN * * End of LZERES. * END COMPLEX*16 FUNCTION ZBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC DCMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF ZBEG = DCMPLX( ( I - 500 )/1001.0D0, ( J - 500 )/1001.0D0 ) RETURN * * End of ZBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/fortran/zblat3.f000066400000000000000000004016331474157777200177710ustar00rootroot00000000000000*> \brief \b ZBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM ZBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX*16 Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 9 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 23 lines: *> 'zblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. *> ZHERK T PUT F FOR NO TEST. SAME COLUMNS. *> ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. *> ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex16_blas_testing * * ===================================================================== PROGRAM ZBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX*16 AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LZE EXTERNAL DDIFF, LZE * .. External Subroutines .. EXTERNAL ZCHK1, ZCHK2, ZCHK3, ZCHK4, ZCHK5, ZCHKE, ZMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'ZGEMM ', 'ZHEMM ', 'ZSYMM ', 'ZTRMM ', $ 'ZTRSM ', 'ZHERK ', 'ZSYRK ', 'ZHER2K', $ 'ZSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from ZMMCH CT holds * the result computed by ZMMCH. TRANSA = 'N' TRANSB = 'N' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'C' TRANSB = 'N' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL ZCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 150, 160, 160, 170, 170, $ 180, 180 )ISNUM * Test ZGEMM, 01. 140 CALL ZCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test ZHEMM, 02, ZSYMM, 03. 150 CALL ZCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test ZTRMM, 04, ZTRSM, 05. 160 CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test ZHERK, 06, ZSYRK, 07. 170 CALL ZCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test ZHER2K, 08, ZSYR2K, 09. 180 CALL ZCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE COMPLEX*16 LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9992 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN ZMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' ZMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of ZBLAT3. * END SUBROUTINE ZCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests ZGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZGEMM, ZMAKE, ZMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL ZMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL ZMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL ZMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL ZGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LZE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LZE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LZE( CS, CC, LCC ) ELSE ISAME( 12 ) = LZERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL ZMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK1. * END SUBROUTINE ZCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests ZHEMM and ZSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL CONJ, LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHEMM, ZMAKE, ZMMCH, ZSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL ZMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the hermitian or symmetric matrix A. * CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', NA, NA, A, NMAX, $ AA, LDA, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL ZMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA IF( CONJ )THEN CALL ZHEMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) ELSE CALL ZSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LZE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LZE( CS, CC, LCC ) ELSE ISAME( 11 ) = LZERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL ZMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL ZMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK2. * END SUBROUTINE ZCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests ZTRMM and ZTRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZMAKE, ZMMCH, ZTRMM, ZTRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero matrix for ZMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL ZMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL ZMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL ZTRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL ZTRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LZE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LZE( BS, BB, LBB ) ELSE ISAME( 10 ) = LZERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL ZMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL ZMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL ZMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL ZMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK3. * END SUBROUTINE ZCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests ZHERK and ZSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RONE, RZERO PARAMETER ( RONE = 1.0D0, RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BETS DOUBLE PRECISION ERR, ERRMAX, RALPHA, RALS, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHERK, ZMAKE, ZMMCH, ZSYRK * .. Intrinsic Functions .. INTRINSIC DCMPLX, MAX, DBLE * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL ZMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) IF( CONJ )THEN RALPHA = DBLE( ALPHA ) ALPHA = DCMPLX( RALPHA, RZERO ) END IF * DO 50 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = DBLE( BETA ) BETA = DCMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.RALPHA.EQ. $ RZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K IF( CONJ )THEN RALS = RALPHA ELSE ALS = ALPHA END IF DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, RALPHA, LDA, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL ZHERK( UPLO, TRANS, N, K, RALPHA, AA, $ LDA, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL ZSYRK( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K IF( CONJ )THEN ISAME( 5 ) = RALS.EQ.RALPHA ELSE ISAME( 5 ) = ALS.EQ.ALPHA END IF ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( CONJ )THEN ISAME( 8 ) = RBETS.EQ.RBETA ELSE ISAME( 8 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 9 ) = LZE( CS, CC, LCC ) ELSE ISAME( 9 ) = LZERES( SNAME( 2: 3 ), UPLO, N, $ N, CS, CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL ZMMCH( TRANST, 'N', LJ, 1, K, $ ALPHA, A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL ZMMCH( 'N', TRANST, LJ, 1, K, $ ALPHA, A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, RALPHA, $ LDA, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, $ '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK4. * END SUBROUTINE ZCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests ZHER2K and ZSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RONE, RZERO PARAMETER ( RONE = 1.0D0, RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BETS DOUBLE PRECISION ERR, ERRMAX, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHER2K, ZMAKE, ZMMCH, ZSYR2K * .. Intrinsic Functions .. INTRINSIC DCMPLX, DCONJG, MAX, DBLE * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = DBLE( BETA ) BETA = DCMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.ALPHA.EQ. $ ZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL ZHER2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL ZSYR2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LZE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB IF( CONJ )THEN ISAME( 10 ) = RBETS.EQ.RBETA ELSE ISAME( 10 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 11 ) = LZE( CS, CC, LCC ) ELSE ISAME( 11 ) = LZERES( 'HE', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = ALPHA*AB( ( J - 1 )*2* $ NMAX + K + I ) IF( CONJ )THEN W( K + I ) = DCONJG( ALPHA )* $ AB( ( J - 1 )*2* $ NMAX + I ) ELSE W( K + I ) = ALPHA* $ AB( ( J - 1 )*2* $ NMAX + I ) END IF 50 CONTINUE CALL ZMMCH( TRANST, 'N', LJ, 1, 2*K, $ ONE, AB( JJAB ), 2*NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE DO 60 I = 1, K IF( CONJ )THEN W( I ) = ALPHA*DCONJG( AB( ( K + $ I - 1 )*NMAX + J ) ) W( K + I ) = DCONJG( ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) ) ELSE W( I ) = ALPHA*AB( ( K + I - 1 )* $ NMAX + J ) W( K + I ) = ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) END IF 60 CONTINUE CALL ZMMCH( 'N', 'N', LJ, 1, 2*K, ONE, $ AB( JJ ), NMAX, W, 2*NMAX, $ BETA, C( JJ, J ), NMAX, CT, $ G, CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC END IF * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, $ ', C,', I3, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK5. * END SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) * 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM * with INFOT = 9 (eca) * 10-9-00: Declared INTRINSIC DCMPLX (susan) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION RALPHA, RBETA * .. Local Arrays .. COMPLEX*16 A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL ZGEMM, ZHEMM, ZHER2K, ZHERK, CHKXER, ZSYMM, $ ZSYR2K, ZSYRK, ZTRMM, ZTRSM * .. Intrinsic Functions .. INTRINSIC DCMPLX * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA, BETA, RALPHA, and RBETA. * ALPHA = DCMPLX( ONE, -ONE ) BETA = DCMPLX( TWO, -TWO ) RALPHA = ONE RBETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 CALL ZGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL ZGEMM( '/', 'C', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL ZGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMM( 'C', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'N', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'C', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'C', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'C', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'T', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'N', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'C', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'C', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'C', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'T', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'N', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'C', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'C', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'C', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'T', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'C', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'C', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'T', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'N', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'C', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'T', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'C', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'C', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'C', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'C', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'T', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 20 INFOT = 1 CALL ZHEMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHEMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 30 INFOT = 1 CALL ZSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 40 INFOT = 1 CALL ZTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 50 INFOT = 1 CALL ZTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 60 INFOT = 1 CALL ZHERK( '/', 'N', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHERK( 'U', 'T', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'U', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'U', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'L', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'L', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'U', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'U', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'L', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'L', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'U', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'U', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'L', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'L', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'U', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'U', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'L', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'L', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 70 INFOT = 1 CALL ZSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZSYRK( 'U', 'C', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 80 INFOT = 1 CALL ZHER2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHER2K( 'U', 'T', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'U', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'L', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'U', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'L', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'U', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'L', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'U', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'L', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'U', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'L', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 90 INFOT = 1 CALL ZSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZSYR2K( 'U', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 100 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of ZCHKE. * END SUBROUTINE ZMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'HE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) COMPLEX*16 ROGUE PARAMETER ( ROGUE = ( -1.0D10, 1.0D10 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) DOUBLE PRECISION RROGUE PARAMETER ( RROGUE = -1.0D10 ) * .. Scalar Arguments .. COMPLEX*16 TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J, JJ LOGICAL GEN, HER, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX*16 ZBEG EXTERNAL ZBEG * .. Intrinsic Functions .. INTRINSIC DCMPLX, DCONJG, DBLE * .. Executable Statements .. GEN = TYPE.EQ.'GE' HER = TYPE.EQ.'HE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = ZBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( HER )THEN A( J, I ) = DCONJG( A( I, J ) ) ELSE IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( HER ) $ A( J, J ) = DCMPLX( DBLE( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE IF( HER )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = DCMPLX( DBLE( AA( JJ ) ), RROGUE ) END IF 90 CONTINUE END IF RETURN * * End of ZMAKE. * END SUBROUTINE ZMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D0, RONE = 1.0D0 ) * .. Scalar Arguments .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ) DOUBLE PRECISION G( * ) * .. Local Scalars .. COMPLEX*16 CL DOUBLE PRECISION ERRI INTEGER I, J, K LOGICAL CTRANA, CTRANB, TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, DIMAG, DCONJG, MAX, DBLE, SQRT * .. Statement Functions .. DOUBLE PRECISION ABS1 * .. Statement Function definitions .. ABS1( CL ) = ABS( DBLE( CL ) ) + ABS( DIMAG( CL ) ) * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' CTRANA = TRANSA.EQ.'C' CTRANB = TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 220 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS1( A( I, K ) )*ABS1( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN IF( CTRANA )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + DCONJG( A( K, I ) )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 60 CONTINUE 70 CONTINUE END IF ELSE IF( .NOT.TRANA.AND.TRANB )THEN IF( CTRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( I, K )*DCONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 80 CONTINUE 90 CONTINUE ELSE DO 110 K = 1, KK DO 100 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 100 CONTINUE 110 CONTINUE END IF ELSE IF( TRANA.AND.TRANB )THEN IF( CTRANA )THEN IF( CTRANB )THEN DO 130 K = 1, KK DO 120 I = 1, M CT( I ) = CT( I ) + DCONJG( A( K, I ) )* $ DCONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 120 CONTINUE 130 CONTINUE ELSE DO 150 K = 1, KK DO 140 I = 1, M CT( I ) = CT( I ) + DCONJG( A( K, I ) )* $ B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 140 CONTINUE 150 CONTINUE END IF ELSE IF( CTRANB )THEN DO 170 K = 1, KK DO 160 I = 1, M CT( I ) = CT( I ) + A( K, I )* $ DCONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 160 CONTINUE 170 CONTINUE ELSE DO 190 K = 1, KK DO 180 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 180 CONTINUE 190 CONTINUE END IF END IF END IF DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( I, J ) ) 200 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 210 I = 1, M ERRI = ABS1( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 230 210 CONTINUE * 220 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 250 * * Report fatal error. * 230 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 240 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 240 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 250 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of ZMMCH. * END LOGICAL FUNCTION LZE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX*16 RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LZE = .TRUE. GO TO 30 20 CONTINUE LZE = .FALSE. 30 RETURN * * End of LZE. * END LOGICAL FUNCTION LZERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'HE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LZERES = .TRUE. GO TO 80 70 CONTINUE LZERES = .FALSE. 80 RETURN * * End of LZERES. * END COMPLEX*16 FUNCTION ZBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC DCMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF ZBEG = DCMPLX( ( I - 500 )/1001.0D0, ( J - 500 )/1001.0D0 ) RETURN * * End of ZBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-1.1/blastest/src/sblat1.c000066400000000000000000001135231474157777200163000ustar00rootroot00000000000000/* sblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static real c_b35 = 1.f; static real c_b39 = .1f; static integer c__5 = 5; static real c_b63 = 0.f; /* > \brief \b SBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM SBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the REAL Level 1 BLAS. */ /* > */ /* > Based upon the original BLAS test routine together with: */ /* > F06EAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup single_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "sblat1"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static real sfac = 9.765625e-4f; /* Format strings */ static char fmt_99999[] = "(\002 Real BLAS Test Program Results\002,/1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check0_(real *), check1_(real *), check2_( real *), check3_(real *), header_(void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 13; ++ic) { combla_1.icase = ic; header_(); /* .. Initialize PASS, INCX, and INCY for a new case. .. */ /* .. the value 9999 for INCX or INCY will appear in the .. */ /* .. detailed output, if any, for cases that do not involve .. */ /* .. these parameters .. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; if (combla_1.icase == 3 || combla_1.icase == 11) { check0_(&sfac); } else if (combla_1.icase == 7 || combla_1.icase == 8 || combla_1.icase == 9 || combla_1.icase == 10) { check1_(&sfac); } else if (combla_1.icase == 1 || combla_1.icase == 2 || combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase == 12 || combla_1.icase == 13) { check2_(&sfac); } else if (combla_1.icase == 4) { check3_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*13] = " SDOT " "SAXPY " "SROTG " " SROT " "SCOPY " "SSWA" "P " "SNRM2 " "SASUM " "SSCAL " "ISAMAX" "SROTMG" "SROTM " "SDSDOT" ; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check0_(real *sfac) { /* Initialized data */ static real ds1[8] = { .8f,.6f,.8f,-.6f,.8f,0.f,1.f,0.f }; static real datrue[8] = { .5f,.5f,.5f,-.5f,-.5f,0.f,1.f,1.f }; static real dbtrue[8] = { 0.f,.6f,0.f,-.6f,0.f,0.f,1.f,0.f }; static real dab[36] /* was [4][9] */ = { .1f,.3f,1.2f,.2f,.7f,.2f,.6f, 4.2f,0.f,0.f,0.f,0.f,4.f,-1.f,2.f,4.f,6e-10f,.02f,1e5f,10.f,4e10f, .02f,1e-5f,10.f,2e-10f,.04f,1e5f,10.f,2e10f,.04f,1e-5f,10.f,4.f, -2.f,8.f,4.f }; static real dtrue[81] /* was [9][9] */ = { 0.f,0.f,1.3f,.2f,0.f,0.f, 0.f,.5f,0.f,0.f,0.f,4.5f,4.2f,1.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,0.f, -2.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,4.f,-1.f,0.f,0.f,0.f,0.f,0.f, .015f,0.f,10.f,-1.f,0.f,-1e-4f,0.f,1.f,0.f,0.f,.06144f,10.f,-1.f, 4096.f,-1e6f,0.f,1.f,0.f,0.f,15.f,10.f,-1.f,5e-5f,0.f,1.f,0.f,0.f, 0.f,15.f,10.f,-1.f,5e5f,-4096.f,1.f,.004096f,0.f,0.f,7.f,4.f,0.f, 0.f,-.5f,-.25f,0.f }; static real d12 = 4096.f; static real da1[8] = { .3f,.4f,-.3f,-.4f,-.3f,0.f,0.f,1.f }; static real db1[8] = { .4f,.3f,.4f,.3f,-.4f,0.f,1.f,0.f }; static real dc1[8] = { .6f,.8f,-.6f,.8f,.6f,1.f,0.f,1.f }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k; real sa, sb, sc, ss, dtemp[9]; extern /* Subroutine */ int srotg_(real *, real *, real *, real *), stest_(integer *, real *, real *, real *, real *), stest1_(real *, real *, real *, real *), srotmg_(real *, real *, real *, real *, real *); /* Fortran I/O blocks */ static cilist io___23 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* INPUT FOR MODIFIED GIVENS */ /* TRUE RESULTS FOR MODIFIED GIVENS */ /* 4096 = 2 ** 12 */ dtrue[0] = .092307692307692313f; dtrue[1] = .27692307692307694f; dtrue[6] = -.16666666666666666f; dtrue[9] = .18666666666666668f; dtrue[10] = .65333333333333332f; dtrue[17] = .14285714285714285f; dtrue[36] = d12 * d12 * 4.5e-10f; dtrue[38] = 4e5f / (d12 * 3.f); dtrue[41] = 1.f / d12; dtrue[43] = 1e4f / (d12 * 3.f); dtrue[45] = 4e10f / (d12 * 1.5f * d12); dtrue[46] = .013333333333333334f; dtrue[52] = d12 * 5e-7f; dtrue[54] = .026666666666666668f; dtrue[55] = d12 * d12 * 1.3333333333333334e-10f; dtrue[60] = -dtrue[41]; dtrue[62] = 1e4f / d12; dtrue[63] = dtrue[54]; dtrue[64] = 2e10f / (d12 * 1.5f * d12); dtrue[72] = 4.5714285714285712f; dtrue[73] = -2.2857142857142856f; /* .. Executable Statements .. */ /* Compute true values which cannot be prestored */ /* in decimal notation */ dbtrue[0] = 1.6666666666666667f; dbtrue[2] = -1.6666666666666667f; dbtrue[4] = 1.6666666666666667f; for (k = 1; k <= 8; ++k) { /* .. Set N=K for identification in output if any .. */ combla_1.n = k; if (combla_1.icase == 3) { /* .. SROTG .. */ if (k > 8) { goto L40; } sa = da1[k - 1]; sb = db1[k - 1]; srotg_(&sa, &sb, &sc, &ss); stest1_(&sa, &datrue[k - 1], &datrue[k - 1], sfac); stest1_(&sb, &dbtrue[k - 1], &dbtrue[k - 1], sfac); stest1_(&sc, &dc1[k - 1], &dc1[k - 1], sfac); stest1_(&ss, &ds1[k - 1], &ds1[k - 1], sfac); } else if (combla_1.icase == 11) { /* .. SROTMG .. */ for (i__ = 1; i__ <= 4; ++i__) { dtemp[i__ - 1] = dab[i__ + (k << 2) - 5]; dtemp[i__ + 3] = 0.f; } dtemp[8] = 0.f; srotmg_(dtemp, &dtemp[1], &dtemp[2], &dtemp[3], &dtemp[4]); stest_(&c__9, dtemp, &dtrue[k * 9 - 9], &dtrue[k * 9 - 9], sfac); } else { s_wsle(&io___23); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK0", (ftnlen)28); e_wsle(); s_stop("", (ftnlen)0); } /* L20: */ } L40: return 0; } /* check0_ */ /* Subroutine */ int check1_(real *sfac) { /* Initialized data */ static real sa[10] = { .3f,-1.f,0.f,1.f,.3f,.3f,.3f,.3f,.3f,.3f }; static real dv[80] /* was [8][5][2] */ = { .1f,2.f,2.f,2.f,2.f,2.f,2.f, 2.f,.3f,3.f,3.f,3.f,3.f,3.f,3.f,3.f,.3f,-.4f,4.f,4.f,4.f,4.f,4.f, 4.f,.2f,-.6f,.3f,5.f,5.f,5.f,5.f,5.f,.1f,-.3f,.5f,-.1f,6.f,6.f, 6.f,6.f,.1f,8.f,8.f,8.f,8.f,8.f,8.f,8.f,.3f,9.f,9.f,9.f,9.f,9.f, 9.f,9.f,.3f,2.f,-.4f,2.f,2.f,2.f,2.f,2.f,.2f,3.f,-.6f,5.f,.3f,2.f, 2.f,2.f,.1f,4.f,-.3f,6.f,-.5f,7.f,-.1f,3.f }; static real dtrue1[5] = { 0.f,.3f,.5f,.7f,.6f }; static real dtrue3[5] = { 0.f,.3f,.7f,1.1f,1.f }; static real dtrue5[80] /* was [8][5][2] */ = { .1f,2.f,2.f,2.f,2.f, 2.f,2.f,2.f,-.3f,3.f,3.f,3.f,3.f,3.f,3.f,3.f,0.f,0.f,4.f,4.f,4.f, 4.f,4.f,4.f,.2f,-.6f,.3f,5.f,5.f,5.f,5.f,5.f,.03f,-.09f,.15f, -.03f,6.f,6.f,6.f,6.f,.1f,8.f,8.f,8.f,8.f,8.f,8.f,8.f,.09f,9.f, 9.f,9.f,9.f,9.f,9.f,9.f,.09f,2.f,-.12f,2.f,2.f,2.f,2.f,2.f,.06f, 3.f,-.18f,5.f,.09f,2.f,2.f,2.f,.03f,4.f,-.09f,6.f,-.15f,7.f,-.03f, 3.f }; static integer itrue2[5] = { 0,1,2,2,3 }; /* System generated locals */ integer i__1; real r__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; real sx[8]; integer np1, len; extern real snrm2_(integer *, real *, integer *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); real stemp[1]; extern real sasum_(integer *, real *, integer *); real strue[8]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *), itest1_(integer *, integer *), stest1_(real *, real *, real *, real *); extern integer isamax_(integer *, real *, integer *); /* Fortran I/O blocks */ static cilist io___36 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { sx[i__ - 1] = dv[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L20: */ } if (combla_1.icase == 7) { /* .. SNRM2 .. */ stemp[0] = dtrue1[np1 - 1]; r__1 = snrm2_(&combla_1.n, sx, &combla_1.incx); stest1_(&r__1, stemp, stemp, sfac); } else if (combla_1.icase == 8) { /* .. SASUM .. */ stemp[0] = dtrue3[np1 - 1]; r__1 = sasum_(&combla_1.n, sx, &combla_1.incx); stest1_(&r__1, stemp, stemp, sfac); } else if (combla_1.icase == 9) { /* .. SSCAL .. */ sscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], sx, &combla_1.incx); i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L40: */ } stest_(&len, sx, strue, strue, sfac); } else if (combla_1.icase == 10) { /* .. ISAMAX .. */ i__1 = isamax_(&combla_1.n, sx, &combla_1.incx); itest1_(&i__1, &itrue2[np1 - 1]); } else { s_wsle(&io___36); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L60: */ } /* L80: */ } return 0; } /* check1_ */ /* Subroutine */ int check2_(real *sfac) { /* Initialized data */ static real sa = .3f; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static real dx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f }; static real dy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f }; static real dt7[16] /* was [4][4] */ = { 0.f,.3f,.21f,.62f,0.f,.3f,-.07f, .85f,0.f,.3f,-.79f,-.74f,0.f,.3f,.33f,1.27f }; static real st7b[16] /* was [4][4] */ = { .1f,.4f,.31f,.72f,.1f, .4f,.03f,.95f,.1f,.4f,-.69f,-.64f,.1f,.4f,.43f,1.37f }; static real dt8[112] /* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f, 0.f,0.f,.68f,0.f,0.f,0.f,0.f,0.f,0.f,.68f,-.87f,0.f,0.f,0.f,0.f, 0.f,.68f,-.87f,.15f,.94f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f, .68f,0.f,0.f,0.f,0.f,0.f,0.f,.35f,-.9f,.48f,0.f,0.f,0.f,0.f,.38f, -.9f,.57f,.7f,-.75f,.2f,.98f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.68f,0.f, 0.f,0.f,0.f,0.f,0.f,.35f,-.72f,0.f,0.f,0.f,0.f,0.f,.38f,-.63f, .15f,.88f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.68f,0.f,0.f, 0.f,0.f,0.f,0.f,.68f,-.9f,.33f,0.f,0.f,0.f,0.f,.68f,-.9f,.33f,.7f, -.75f,.2f,1.04f }; static real dt10x[112] /* was [7][4][4] */ = { .6f,0.f,0.f,0.f,0.f, 0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,-.9f,0.f,0.f,0.f,0.f,0.f, .5f,-.9f,.3f,.7f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,0.f, 0.f,0.f,0.f,0.f,0.f,.3f,.1f,.5f,0.f,0.f,0.f,0.f,.8f,.1f,-.6f,.8f, .3f,-.3f,.5f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f, 0.f,-.9f,.1f,.5f,0.f,0.f,0.f,0.f,.7f,.1f,.3f,.8f,-.9f,-.3f,.5f, .6f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,.3f, 0.f,0.f,0.f,0.f,0.f,.5f,.3f,-.6f,.8f,0.f,0.f,0.f }; static real dt10y[112] /* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f, 0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,.1f,0.f,0.f,0.f,0.f,0.f, .6f,.1f,-.5f,.8f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,0.f, 0.f,0.f,0.f,0.f,0.f,-.5f,-.9f,.6f,0.f,0.f,0.f,0.f,-.4f,-.9f,.9f, .7f,-.5f,.2f,.6f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f, 0.f,0.f,-.5f,.6f,0.f,0.f,0.f,0.f,0.f,-.4f,.9f,-.5f,.6f,0.f,0.f, 0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.6f, -.9f,.1f,0.f,0.f,0.f,0.f,.6f,-.9f,.1f,.7f,-.5f,.2f,.8f }; static real ssize1[4] = { 0.f,.3f,1.6f,3.2f }; static real ssize2[28] /* was [14][2] */ = { 0.f,0.f,0.f,0.f,0.f,0.f, 0.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,1.17f,1.17f,1.17f,1.17f,1.17f, 1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f }; static real ssize3[4] = { .1f,.4f,1.7f,3.3f }; static real dpar[20] /* was [5][4] */ = { -2.f,0.f,0.f,0.f,0.f, -1.f,2.f,-3.f,-4.f,5.f,0.f,0.f,2.f,-3.f,0.f,1.f,5.f,2.f,0.f,-4.f } ; static struct { real e_1[448]; } equiv_3 = {{ .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 3.8f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 2.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, -.4f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, 0.f, 0.f, 0.f, -.8f, 3.8f, -2.2f, -1.2f, 0.f, 0.f, 0.f, -.9f, 2.8f, -1.4f, -1.3f, 0.f, 0.f, 0.f, 3.5f, -.4f, -2.2f, 4.7f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, 0.f, 0.f, 0.f, 0.f, 0.f, .1f, -3.f, 0.f, 0.f, 0.f, 0.f, -.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, 3.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, .9f, -.3f, -.4f, -2.f, .1f, 1.4f, .8f, .6f, -.3f, -2.8f, -1.8f, .1f, 1.3f, .8f, 0.f, -.3f, -1.9f, 3.8f, .1f, -3.1f, .8f, 4.8f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, 0.f, 0.f, 0.f, 0.f, 4.8f, .1f, -3.f, 0.f, 0.f, 0.f, 0.f, 3.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, 2.1f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, .9f, -.3f, -.4f, -1.6f, .1f, -2.2f, .8f, 5.4f, -.3f, -2.8f, -1.5f, .1f, -1.4f, .8f, 3.6f, -.3f, -1.9f, 3.7f, .1f, -2.2f, .8f, 3.6f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, -1.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, .8f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, 0.f, 0.f, 0.f, -.8f, -1.f, 1.4f, -1.6f, 0.f, 0.f, 0.f, -.9f, -.8f, 1.3f, -1.6f, 0.f, 0.f, 0.f, 3.5f, .8f, -3.1f, 4.8f, 0.f, 0.f, 0.f }}; static struct { real e_1[448]; } equiv_7 = {{ .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, -4.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, -.7f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, 0.f, 0.f, 0.f, .7f, -4.8f, 3.f, 1.1f, 0.f, 0.f, 0.f, 1.7f, -.7f, -.7f, 2.3f, 0.f, 0.f, 0.f, -2.6f, 3.5f, -.7f, -3.6f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, 0.f, 0.f, 0.f, 0.f, 4.f, -.9f, -.3f, 0.f, 0.f, 0.f, 0.f, -.5f, -.9f, 1.5f, 0.f, 0.f, 0.f, 0.f, -1.5f, -.9f, -1.8f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, -.6f, .2f, .8f, 3.7f, -.9f, -1.2f, .7f, -1.5f, .2f, 2.2f, -.3f, -.9f, 2.1f, .7f, -1.6f, .2f, 2.f, -1.6f, -.9f, -2.1f, .7f, 2.9f, .2f, -3.8f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 4.f, -6.3f, 0.f, 0.f, 0.f, 0.f, 0.f, -.5f, .3f, 0.f, 0.f, 0.f, 0.f, 0.f, -1.5f, 3.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, 0.f, 0.f, 0.f, 3.7f, -7.2f, 3.f, 1.7f, 0.f, 0.f, 0.f, -.3f, .9f, -.7f, 1.9f, 0.f, 0.f, 0.f, -1.6f, 2.7f, -.7f, -3.4f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, 0.f, 0.f, 0.f, 0.f, .7f, -.9f, 1.2f, 0.f, 0.f, 0.f, 0.f, 1.7f, -.9f, .5f, 0.f, 0.f, 0.f, 0.f, -2.6f, -.9f, -1.3f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, -.6f, .2f, .8f, .7f, -.9f, 1.2f, .7f, -1.5f, .2f, 1.6f, 1.7f, -.9f, .5f, .7f, -1.6f, .2f, 2.4f, -2.6f, -.9f, -1.3f, .7f, 2.9f, .2f, -4.f }}; /* System generated locals */ integer i__1; real r__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, j, ki, kn, mx, my; real sx[7], sy[7]; integer kni; real stx[7], sty[7]; integer kpar, lenx, leny; #define dt19x ((real *)&equiv_3) #define dt19y ((real *)&equiv_7) extern real sdot_(integer *, real *, integer *, real *, integer *); real dtemp[5]; #define dt19xa ((real *)&equiv_3) #define dt19xb ((real *)&equiv_3 + 112) #define dt19xc ((real *)&equiv_3 + 224) #define dt19xd ((real *)&equiv_3 + 336) #define dt19ya ((real *)&equiv_7) #define dt19yb ((real *)&equiv_7 + 112) #define dt19yc ((real *)&equiv_7 + 224) #define dt19yd ((real *)&equiv_7 + 336) integer ksize; real ssize[7]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), stest_(integer *, real *, real *, real *, real *), saxpy_( integer *, real *, real *, integer *, real *, integer *), srotm_( integer *, real *, integer *, real *, integer *, real *), stest1_( real *, real *, real *, real *); extern real sdsdot_(integer *, real *, real *, integer *, real *, integer *); /* Fortran I/O blocks */ static cilist io___80 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* FOR DROTM */ /* TRUE X RESULTS F0R ROTATIONS DROTM */ /* TRUE Y RESULTS FOR ROTATIONS DROTM */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. Initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; /* L20: */ } if (combla_1.icase == 1) { /* .. SDOT .. */ r__1 = sdot_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy); stest1_(&r__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. SAXPY .. */ saxpy_(&combla_1.n, &sa, sx, &combla_1.incx, sy, & combla_1.incy); i__1 = leny; for (j = 1; j <= i__1; ++j) { sty[j - 1] = dt8[j + (kn + (ki << 2)) * 7 - 36]; /* L40: */ } stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else if (combla_1.icase == 5) { /* .. SCOPY .. */ for (i__ = 1; i__ <= 7; ++i__) { sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L60: */ } scopy_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 6) { /* .. SSWAP .. */ sswap_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); for (i__ = 1; i__ <= 7; ++i__) { stx[i__ - 1] = dt10x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L80: */ } stest_(&lenx, sx, stx, ssize2, &c_b35); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 12) { /* .. SROTM .. */ kni = kn + (ki - 1 << 2); for (kpar = 1; kpar <= 4; ++kpar) { for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - 36]; sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - 36]; } for (i__ = 1; i__ <= 5; ++i__) { dtemp[i__ - 1] = dpar[i__ + kpar * 5 - 6]; } i__1 = lenx; for (i__ = 1; i__ <= i__1; ++i__) { ssize[i__ - 1] = stx[i__ - 1]; } /* SEE REMARK ABOVE ABOUT DT11X(1,2,7) */ /* AND DT11X(5,3,8). */ if (kpar == 2 && kni == 7) { ssize[0] = 2.4f; } if (kpar == 3 && kni == 8) { ssize[4] = 1.8f; } srotm_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy, dtemp); stest_(&lenx, sx, stx, ssize, sfac); stest_(&leny, sy, sty, sty, sfac); } } else if (combla_1.icase == 13) { /* .. SDSROT .. */ r__1 = sdsdot_(&combla_1.n, &c_b39, sx, &combla_1.incx, sy, & combla_1.incy); stest1_(&r__1, &st7b[kn + (ki << 2) - 5], &ssize3[kn - 1], sfac); } else { s_wsle(&io___80); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L100: */ } /* L120: */ } return 0; } /* check2_ */ #undef dt19yd #undef dt19yc #undef dt19yb #undef dt19ya #undef dt19xd #undef dt19xc #undef dt19xb #undef dt19xa #undef dt19y #undef dt19x /* Subroutine */ int check3_(real *sfac) { /* Initialized data */ static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static real dx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f }; static real dy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f }; static real sc = .8f; static real ss = .6f; static real dt9x[112] /* was [7][4][4] */ = { .6f,0.f,0.f,0.f,0.f, 0.f,0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.78f,-.46f,0.f,0.f,0.f,0.f, 0.f,.78f,-.46f,-.22f,1.06f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f, 0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.66f,.1f,-.1f,0.f,0.f,0.f,0.f, .96f,.1f,-.76f,.8f,.9f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f, .78f,0.f,0.f,0.f,0.f,0.f,0.f,-.06f,.1f,-.1f,0.f,0.f,0.f,0.f,.9f, .1f,-.22f,.8f,.18f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.78f, 0.f,0.f,0.f,0.f,0.f,0.f,.78f,.26f,0.f,0.f,0.f,0.f,0.f,.78f,.26f, -.76f,1.12f,0.f,0.f,0.f }; static real dt9y[112] /* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f, 0.f,0.f,.04f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,-.78f,0.f,0.f,0.f,0.f, 0.f,.04f,-.78f,.54f,.08f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f, .04f,0.f,0.f,0.f,0.f,0.f,0.f,.7f,-.9f,-.12f,0.f,0.f,0.f,0.f,.64f, -.9f,-.3f,.7f,-.18f,.2f,.28f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f, 0.f,0.f,0.f,0.f,0.f,.7f,-1.08f,0.f,0.f,0.f,0.f,0.f,.64f,-1.26f, .54f,.2f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f,0.f,0.f, 0.f,0.f,0.f,.04f,-.9f,.18f,0.f,0.f,0.f,0.f,.04f,-.9f,.18f,.7f, -.18f,.2f,.16f }; static real ssize2[28] /* was [14][2] */ = { 0.f,0.f,0.f,0.f,0.f,0.f, 0.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,1.17f,1.17f,1.17f,1.17f,1.17f, 1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k, ki, kn, mx, my; real sx[7], sy[7], stx[7], sty[7]; integer lenx, leny; real mwpc[11]; integer mwpn[11]; real mwps[11]; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); real mwpx[5], mwpy[5]; integer ksize; real copyx[5], copyy[5]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); real mwptx[55] /* was [11][5] */, mwpty[55] /* was [11][5] */; integer mwpinx[11], mwpiny[11]; real mwpstx[5], mwpsty[5]; /* Fortran I/O blocks */ static cilist io___104 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; if (combla_1.icase == 4) { /* .. SROT .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt9x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt9y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L20: */ } srot_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy, & sc, &ss); stest_(&lenx, sx, stx, &ssize2[ksize * 14 - 14], sfac); stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else { s_wsle(&io___104); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK3", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } mwpc[0] = 1.f; for (i__ = 2; i__ <= 11; ++i__) { mwpc[i__ - 1] = 0.f; /* L80: */ } mwps[0] = 0.f; for (i__ = 2; i__ <= 6; ++i__) { mwps[i__ - 1] = 1.f; /* L100: */ } for (i__ = 7; i__ <= 11; ++i__) { mwps[i__ - 1] = -1.f; /* L120: */ } mwpinx[0] = 1; mwpinx[1] = 1; mwpinx[2] = 1; mwpinx[3] = -1; mwpinx[4] = 1; mwpinx[5] = -1; mwpinx[6] = 1; mwpinx[7] = 1; mwpinx[8] = -1; mwpinx[9] = 1; mwpinx[10] = -1; mwpiny[0] = 1; mwpiny[1] = 1; mwpiny[2] = -1; mwpiny[3] = -1; mwpiny[4] = 2; mwpiny[5] = 1; mwpiny[6] = 1; mwpiny[7] = -1; mwpiny[8] = -1; mwpiny[9] = 2; mwpiny[10] = 1; for (i__ = 1; i__ <= 11; ++i__) { mwpn[i__ - 1] = 5; /* L140: */ } mwpn[4] = 3; mwpn[9] = 3; for (i__ = 1; i__ <= 5; ++i__) { mwpx[i__ - 1] = (real) i__; mwpy[i__ - 1] = (real) i__; mwptx[i__ * 11 - 11] = (real) i__; mwpty[i__ * 11 - 11] = (real) i__; mwptx[i__ * 11 - 10] = (real) i__; mwpty[i__ * 11 - 10] = (real) (-i__); mwptx[i__ * 11 - 9] = (real) (6 - i__); mwpty[i__ * 11 - 9] = (real) (i__ - 6); mwptx[i__ * 11 - 8] = (real) i__; mwpty[i__ * 11 - 8] = (real) (-i__); mwptx[i__ * 11 - 6] = (real) (6 - i__); mwpty[i__ * 11 - 6] = (real) (i__ - 6); mwptx[i__ * 11 - 5] = (real) (-i__); mwpty[i__ * 11 - 5] = (real) i__; mwptx[i__ * 11 - 4] = (real) (i__ - 6); mwpty[i__ * 11 - 4] = (real) (6 - i__); mwptx[i__ * 11 - 3] = (real) (-i__); mwpty[i__ * 11 - 3] = (real) i__; mwptx[i__ * 11 - 1] = (real) (i__ - 6); mwpty[i__ * 11 - 1] = (real) (6 - i__); /* L160: */ } mwptx[4] = 1.f; mwptx[15] = 3.f; mwptx[26] = 5.f; mwptx[37] = 4.f; mwptx[48] = 5.f; mwpty[4] = -1.f; mwpty[15] = 2.f; mwpty[26] = -2.f; mwpty[37] = 4.f; mwpty[48] = -3.f; mwptx[9] = -1.f; mwptx[20] = -3.f; mwptx[31] = -5.f; mwptx[42] = 4.f; mwptx[53] = 5.f; mwpty[9] = 1.f; mwpty[20] = 2.f; mwpty[31] = 2.f; mwpty[42] = 4.f; mwpty[53] = 3.f; for (i__ = 1; i__ <= 11; ++i__) { combla_1.incx = mwpinx[i__ - 1]; combla_1.incy = mwpiny[i__ - 1]; for (k = 1; k <= 5; ++k) { copyx[k - 1] = mwpx[k - 1]; copyy[k - 1] = mwpy[k - 1]; mwpstx[k - 1] = mwptx[i__ + k * 11 - 12]; mwpsty[k - 1] = mwpty[i__ + k * 11 - 12]; /* L180: */ } srot_(&mwpn[i__ - 1], copyx, &combla_1.incx, copyy, &combla_1.incy, & mwpc[i__ - 1], &mwps[i__ - 1]); stest_(&c__5, copyx, mwpstx, mwpstx, sfac); stest_(&c__5, copyy, mwpsty, mwpsty, sfac); /* L200: */ } return 0; } /* check3_ */ /* Subroutine */ int stest_(integer *len, real *scomp, real *strue, real * ssize, real *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY I " " \002,\002 COMP(I) TRUE(I) " " DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,i3,2e36.8,2e12.4)"; /* System generated locals */ integer i__1; real r__1, r__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; real sd; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___121 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___122 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___123 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((r__2 = *sfac * sd, abs(r__2)) <= (r__1 = ssize[i__], abs(r__1)) * s_epsilon_(&c_b63)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___121); e_wsfe(); s_wsfe(&io___122); e_wsfe(); L20: s_wsfe(&io___123); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(real)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int stest1_(real *scomp1, real *strue1, real *ssize, real * sfac) { real scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ real sdiff_(real *sa, real *sb) { /* System generated locals */ real ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY " " \002,\002 COMP TRUE " " DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___126 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___127 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___129 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___126); e_wsfe(); s_wsfe(&io___127); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___129); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int sblat1_ () { main (); return 0; } blis-1.1/blastest/src/sblat2.c000066400000000000000000004457361474157777200163170ustar00rootroot00000000000000/* sblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static real c_b120 = 0.f; static real c_b128 = 1.f; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b SBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM SBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the REAL Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 16 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 34 lines: */ /* > 'sblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 0.9 VALUES OF BETA */ /* > SGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SGER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup single_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "sblat2"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*16] = "SGEMV " "SGBMV " "SSYMV " "SSBMV " "SSPMV " "STRMV " "STBMV " "STPMV " "STRSV " "STBSV " "STPSV " "SGER " "SSYR " "SSPR " "SSYR2 " "SSPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE REAL LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9988[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9985[] = "(\002 ERROR IN SMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 SMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ real a[4225] /* was [65][65] */, g[65]; integer i__, j, n; real x[65], y[65], z__[130], aa[4225]; integer kb[7]; real as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7]; integer inc[7], nkb; real bet[7]; extern logical lse_(real *, real *, integer *); real eps, err; integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real * , real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, real *, real *, real * , real *, real *, real *, real *, real *, real *, ftnlen), schk4_( char *, real *, real *, integer *, integer *, logical *, logical * , logical *, integer *, integer *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, integer *, integer *, integer *, real *, real * , real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk6_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen); logical fatal; extern /* Subroutine */ int schke_(integer *, char *, integer *, ftnlen); logical trace; integer nidim; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; logical ltest[16], sfatal; char snamet[6]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 16; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 16; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b120); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of SMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; a[i__ + j * 65 - 66] = (real) max(i__3,0); /* L110: */ } x[j - 1] = (real) j; y[j - 1] = 0.f; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { yy[j - 1] = (real) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3) ; /* L130: */ } /* YY holds the exact result. On exit from SMVCH YT holds */ /* the result computed by SMVCH. */ *(unsigned char *)trans = 'N'; smvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c__1, &c_b120, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lse_(yy, yt, &n); if (! same || err != 0.f) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; smvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c_n1, &c_b120, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lse_(yy, yt, &n); if (! same || err != 0.f) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 16; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { schke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L180; case 14: goto L180; case 15: goto L190; case 16: goto L190; } /* Test SGEMV, 01, and SGBMV, 02. */ L140: schk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test SSYMV, 03, SSBMV, 04, and SSPMV, 05. */ L150: schk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test STRMV, 06, STBMV, 07, STPMV, 08, */ /* STRSV, 09, STBSV, 10, and STPSV, 11. */ L160: schk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test SGER, 12. */ L170: schk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test SSYR, 13, and SSPR, 14. */ L180: schk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test SSYR2, 15, and SSPR2, 16. */ L190: schk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of SBLAT2. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int schk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, real *alf, integer *nbet, real *bet, integer *ninc, integer * inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; real als, bls; extern logical lse_(real *, real *, integer *); real err; integer iku, kls, kus; real beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int sgbmv_(char *, integer *, integer *, integer * , integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen), sgemv_(char *, integer *, integer * , real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); logical reset; integer incxs, incys; char trans[1]; logical banded; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SGEMV and SGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl = 0.f; smake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl = .5f; i__4 = abs(incx); i__5 = nl - 1; smake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { x[nl / 2] = 0.f; xx[abs(incx) * (nl / 2 - 1) + 1] = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.f; i__7 = abs(incy); i__8 = ml - 1; smake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als == alpha; isame[4] = lse_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lse_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lse_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lseres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als == alpha; isame[6] = lse_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lse_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls == beta; if (null) { isame[11] = lse_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lseres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ smvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of SCHK1. */ } /* schk1_ */ /* Subroutine */ int schk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, real *alf, integer *nbet, real *bet, integer *ninc, integer * inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f4.1," "\002, Y,\002,i2,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, AP\002,\002, X,\002,i2,\002,\002,f4.1" ",\002, Y,\002,i2,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; real als, bls; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; extern /* Subroutine */ int ssbmv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); char uplos[1]; extern /* Subroutine */ int sspmv_(char *, integer *, real *, real *, real *, integer *, real *, real *, integer *, ftnlen), ssymv_( char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); logical banded, packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SSYMV, SSBMV and SSPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl = 0.f; smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__4 = abs(incx); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.f; i__7 = abs(incy); i__8 = n - 1; smake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssymv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sspmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als == alpha; isame[3] = lse_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lse_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls == beta; if (null) { isame[8] = lse_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lseres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als == alpha; isame[4] = lse_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lse_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lse_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lseres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als == alpha; isame[3] = lse_(&as[1], &aa[1], &laa); isame[4] = lse_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls == beta; if (null) { isame[7] = lse_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lseres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ smvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of SCHK2. */ } /* schk2_ */ /* Subroutine */ int schk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *xt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, icu; extern logical lse_(real *, real *, integer *); real err; char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1], diags[1]; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs; char trans[1]; extern /* Subroutine */ int stbmv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; extern /* Subroutine */ int stpmv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strmv_(char *, char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stpsv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strsv_(char * , char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero vector for SMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { z__[i__] = 0.f; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl = 0.f; smake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__4 = abs(incx); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lse_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lse_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lseres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lse_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lseres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lse_(&as[1], &aa[1], &laa); if (null) { isame[5] = lse_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lseres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ smvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &x[1], &incx, & c_b120, &z__[1], &incx, &xt[1], & g[1], &xx[1], eps, &err, fatal, nout, &c_true, (ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = xx[(i__ - 1) * abs(incx) + 1]; xx[(i__ - 1) * abs(incx) + 1] = x[i__] ; /* L50: */ } smvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &z__[1], &incx, & c_b120, &x[1], &incx, &xt[1], &g[ 1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of SCHK3. */ } /* schk3_ */ /* Subroutine */ int schk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002,\002)" ",f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n; real w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; real als; extern logical lse_(real *, real *, integer *); real err; integer ldas; logical same; extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); integer incx, incy; logical null; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___284 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___285 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___292 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SGER. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Define the number of arguments. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl = .5f; i__3 = abs(incx); i__4 = m - 1; smake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { x[m / 2] = 0.f; xx[abs(incx) * (m / 2 - 1) + 1] = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.f; i__4 = abs(incy); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.f; yy[abs(incy) * (n / 2 - 1) + 1] = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ transl = 0.f; i__5 = m - 1; i__6 = n - 1; smake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___284.ciunit = *ntra; s_wsfe(&io___284); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sger_(&m, &n, &alpha, &xx[1], &incx, &yy[1], &incy, & aa[1], &lda); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___285.ciunit = *nout; s_wsfe(&io___285); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lse_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lse_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lse_(&as[1], &aa[1], &laa); } else { isame[7] = lseres_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[i__]; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[m - i__ + 1]; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { w[0] = y[j]; } else { w[0] = y[n - j + 1]; } smvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b128, &a[j * a_dim1 + 1], &c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___292.ciunit = *nout; s_wsfe(&io___292); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L150; L130: io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of SCHK4. */ } /* schk4_ */ /* Subroutine */ int schk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; real w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; real als; extern logical lse_(real *, real *, integer *); real err; integer ldas; logical same; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int sspr_(char *, integer *, real *, real *, integer *, real *, ftnlen), ssyr_(char *, integer *, real *, real *, integer *, real *, integer *, ftnlen); real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs; logical upper; char uplos[1]; logical packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___324 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___325 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___329 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYR and SSPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__3 = abs(incx); i__4 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.f; /* Generate the matrix A. */ transl = 0.f; i__4 = n - 1; i__5 = n - 1; smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___324.ciunit = *ntra; s_wsfe(&io___324); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], &lda, ( ftnlen)1); } else if (packed) { if (*trace) { io___325.ciunit = *ntra; s_wsfe(&io___325); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sspr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___326.ciunit = *nout; s_wsfe(&io___326); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lse_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lse_(&as[1], &aa[1], &laa); } else { isame[5] = lseres_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___329.ciunit = *nout; s_wsfe(&io___329); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[i__]; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[n - i__ + 1]; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { w[0] = z__[j]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } smvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b128, &a[jj + j * a_dim1], & c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of SCHK5. */ } /* schk5_ */ /* Subroutine */ int schk6_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; real w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; real als; extern logical lse_(real *, real *, integer *); real err; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int sspr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, ftnlen), ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *, ftnlen); real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; logical upper; char uplos[1]; logical packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___373 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___374 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___375 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___378 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___386 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYR2 and SSPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__3 = abs(incx); i__4 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.f; i__4 = abs(incy); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.f; yy[abs(incy) * (n / 2 - 1) + 1] = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.f; /* Generate the matrix A. */ transl = 0.f; i__5 = n - 1; i__6 = n - 1; smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___373.ciunit = *ntra; s_wsfe(&io___373); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___374.ciunit = *ntra; s_wsfe(&io___374); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sspr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___375.ciunit = *nout; s_wsfe(&io___375); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lse_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lse_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lse_(&as[1], &aa[1], &laa); } else { isame[7] = lseres_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___378.ciunit = *nout; s_wsfe(&io___378); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[i__]; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[n - i__ + 1]; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[i__]; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[n - i__ + 1]; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { w[0] = z__[j + (z_dim1 << 1)]; w[1] = z__[j + z_dim1]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } smvch_("N", &lj, &c__2, &alpha, &z__[jj + z_dim1], nmax, w, &c__1, &c_b128, &a[ jj + j * a_dim1], &c__1, &yt[1], &g[1] , &aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___386.ciunit = *nout; s_wsfe(&io___386); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L170; L150: io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of SCHK6. */ } /* schk6_ */ /* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ real a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *), sspr_(char *, integer *, real *, real *, integer *, real *, ftnlen), ssyr_(char *, integer *, real *, real *, integer *, real *, integer *, ftnlen), sspr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, ftnlen), ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *, ftnlen); real alpha; extern /* Subroutine */ int sgbmv_(char *, integer *, integer *, integer * , integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), ssbmv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), stbmv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), sspmv_( char *, integer *, real *, real *, real *, integer *, real *, real *, integer *, ftnlen), stpmv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strmv_(char *, char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stpsv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), strsv_( char *, char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___396 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___397 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; } L10: infoc_1.infot = 1; sgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; sgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; sgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L20: infoc_1.infot = 1; sgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L30: infoc_1.infot = 1; ssymv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssymv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ssymv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssymv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L40: infoc_1.infot = 1; ssbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ssbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ssbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ssbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L50: infoc_1.infot = 1; sspmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sspmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; sspmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; sspmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L60: infoc_1.infot = 1; strmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; strmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L70: infoc_1.infot = 1; stbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; stbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; stbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L80: infoc_1.infot = 1; stpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L90: infoc_1.infot = 1; strsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; strsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L100: infoc_1.infot = 1; stbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; stbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; stbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L110: infoc_1.infot = 1; stpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L120: infoc_1.infot = 1; sger_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sger_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sger_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; sger_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; sger_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L130: infoc_1.infot = 1; ssyr_("/", &c__0, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyr_("U", &c_n1, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ssyr_("U", &c__0, &alpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr_("U", &c__2, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L140: infoc_1.infot = 1; sspr_("/", &c__0, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sspr_("U", &c_n1, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sspr_("U", &c__0, &alpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L150: infoc_1.infot = 1; ssyr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ssyr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L160: infoc_1.infot = 1; sspr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sspr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sspr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; sspr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L170: if (infoc_1.ok) { io___396.ciunit = *nout; s_wsfe(&io___396); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of SCHKE. */ } /* schke_ */ /* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, integer *n, real *a, integer *nmax, real *aa, integer *lda, integer * kl, integer *ku, logical *reset, real *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, kk; logical gen, tri, sym; integer ibeg, iend; extern real sbeg_(logical *); integer ioff; logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'S'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { a[i__ + j * a_dim1] = sbeg_(reset) + *transl; } else { a[i__ + j * a_dim1] = 0.f; } if (i__ != j) { if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.f; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.f; } if (unit) { a[j + j * a_dim1] = 1.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { aa[i1 + (j - 1) * *lda] = -1e10f; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { aa[i2 + (j - 1) * *lda] = a[i2 + j - *ku - 1 + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { aa[i3 + (j - 1) * *lda] = -1e10f; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L120: */ } /* L130: */ } } else if (s_cmp(type__, "SB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j - kk + j * a_dim1]; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L160: */ } /* L170: */ } } else if (s_cmp(type__, "SP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; aa[ioff] = a[i__ + j * a_dim1]; if (i__ == j) { if (unit) { aa[ioff] = -1e10f; } } /* L180: */ } /* L190: */ } } return 0; /* End of SMAKE. */ } /* smake_ */ /* Subroutine */ int smvch_(char *trans, integer *m, integer *n, real *alpha, real *a, integer *nmax, real *x, integer *incx, real *beta, real *y, integer *incy, real *yt, real *g, real *yy, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; real erri; logical tran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___425 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___426 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___427 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T' || *(unsigned char *)trans == 'C'; if (tran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { yt[iy] = 0.f; g[iy] = 0.f; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[j + i__ * a_dim1] * x[jx]; g[iy] += (r__1 = a[j + i__ * a_dim1] * x[jx], abs(r__1)); jx += incxl; /* L10: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[i__ + j * a_dim1] * x[jx]; g[iy] += (r__1 = a[i__ + j * a_dim1] * x[jx], abs(r__1)); jx += incxl; /* L20: */ } } yt[iy] = *alpha * yt[iy] + *beta * y[iy]; g[iy] = abs(*alpha) * g[iy] + (r__1 = *beta * y[iy], abs(r__1)); iy += incyl; /* L30: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { erri = (r__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(r__1)) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L50; } /* L40: */ } /* If the loop completes, all results are at least half accurate. */ goto L70; /* Report fatal error. */ L50: *fatal = TRUE_; io___425.ciunit = *nout; s_wsfe(&io___425); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___426.ciunit = *nout; s_wsfe(&io___426); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); e_wsfe(); } else { io___427.ciunit = *nout; s_wsfe(&io___427); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L60: */ } L70: return 0; /* End of SMVCH. */ } /* smvch_ */ logical lse_(real *ri, real *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LSE. */ } /* lse_ */ logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, real *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'SY' or 'SP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LSERES. */ } /* lseres_ */ real sbeg_(logical *reset) { /* System generated locals */ real ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (real) (i__ - 500) / 1001.f; return ret_val; /* End of SBEG. */ } /* sbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___437 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___437.ciunit = *nout; s_wsfe(&io___437); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___438 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___439 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___440 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___438.ciunit = infoc_2.nout; s_wsfe(&io___438); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___439.ciunit = infoc_2.nout; s_wsfe(&io___439); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___440.ciunit = infoc_2.nout; s_wsfe(&io___440); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int sblat2_ () { main (); return 0; } blis-1.1/blastest/src/sblat3.c000066400000000000000000004312041474157777200163010ustar00rootroot00000000000000/* sblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static real c_b84 = 0.f; static real c_b94 = 1.f; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; static integer c__2 = 2; /* > \brief \b SBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM SBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the REAL Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 6 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 20 lines: */ /* > 'sblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 1.3 VALUES OF BETA */ /* > SGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup single_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "sblat3"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*6] = "SGEMM " "SSYMM " "STRMM " "STRSM " "SSYRK " "SSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE REAL LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9992[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9989[] = "(\002 ERROR IN SMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 SMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ real c__[4225] /* was [65][65] */, g[65]; integer i__, j, n; real w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[ 4225], as[4225], bs[4225], cs[4225], ct[65], alf[7], bet[7]; extern logical lse_(real *, real *, integer *); real eps, err; integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk4_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen); logical fatal; extern /* Subroutine */ int schke_(integer *, char *, integer *, ftnlen); logical trace; integer nidim; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); char snaps[32]; integer isnum; logical ltest[6], sfatal; char snamet[6], transa[1], transb[1]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 6; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 6; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b84); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of SMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; ab[i__ + j * 65 - 66] = (real) max(i__3,0); /* L90: */ } ab[j + 4224] = (real) j; ab[(j + 65) * 65 - 65] = (real) j; c__[j - 1] = 0.f; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[j - 1] = (real) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3) ; /* L110: */ } /* CC holds the exact result. On exit from SMMCH CT holds */ /* the result computed by SMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { ab[j + 4224] = (real) (n - j + 1); ab[(j + 65) * 65 - 65] = (real) (n - j + 1); /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[n - j] = (real) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3) ; /* L130: */ } *(unsigned char *)transa = 'T'; *(unsigned char *)transb = 'N'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 6; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { schke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L160; case 4: goto L160; case 5: goto L170; case 6: goto L180; } /* Test SGEMM, 01. */ L140: schk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test SSYMM, 02. */ L150: schk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test STRMM, 03, STRSM, 04. */ L160: schk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test SSYRK, 05. */ L170: schk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test SSYR2K, 06. */ L180: schk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of SBLAT3. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int schk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002" ",i3,\002,\002,f4.1,\002, \002,\002C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; real als, bls; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas, ldbs, ldcs; logical same, null; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); logical trana, tranb; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen), sgemm_( char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); integer nargs; logical reset; char tranas[1], tranbs[1], transa[1], transb[1]; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ smake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ smake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b84, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als = alpha; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als == alpha; isame[6] = lse_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lse_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls == beta; if (null) { isame[11] = lse_(&cs[1], &cc[1], &lcc); } else { isame[11] = lseres_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ smmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of SCHK1. */ } /* schk1_ */ /* Subroutine */ int schk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc, ics; real als, bls; integer icu; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical left, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); char sides[1]; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char uplos[1]; extern /* Subroutine */ int ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___171 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___172 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___175 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___177 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ smake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b84, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the symmetric matrix A. */ smake_("SY", uplo, " ", &na, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___171.ciunit = *ntra; s_wsfe(&io___171); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssymm_(side, uplo, &m, &n, &alpha, &aa[1], &lda, & bb[1], &ldb, &beta, &cc[1], &ldc, (ftnlen) 1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___172.ciunit = *nout; s_wsfe(&io___172); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als == alpha; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lse_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls == beta; if (null) { isame[10] = lse_(&cs[1], &cc[1], &lcc); } else { isame[10] = lseres_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___175.ciunit = *nout; s_wsfe(&io___175); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { smmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { smmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___177.ciunit = *nout; s_wsfe(&io___177); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L120; L110: io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of SCHK2. */ } /* schk2_ */ /* Subroutine */ int schk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real * ct, real *g, real *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb, ics; real als; integer ict, icu; extern logical lse_(real *, real *, integer *); real err; char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; real alpha; char diags[1]; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); char sides[1]; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char uplos[1]; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * , ftnlen, ftnlen, ftnlen, ftnlen), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char tranas[1], transa[1]; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___221 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___226 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___228 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9995, 0 }; /* Tests STRMM and STRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero matrix for SMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.f; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ smake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); /* Generate the matrix B. */ smake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { bs[i__] = bb[i__]; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___221.ciunit = *ntra; s_wsfe(&io___221); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___223.ciunit = *nout; s_wsfe(&io___223); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als == alpha; isame[7] = lse_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lse_(&bs[1], &bb[1], &lbb); } else { isame[9] = lseres_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___226.ciunit = *nout; s_wsfe(&io___226); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { smmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b84, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { smmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b84, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { c__[i__ + j * c_dim1] = bb[i__ + (j - 1) * ldb]; bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * b_dim1]; /* L60: */ } /* L70: */ } if (left) { smmch_(transa, "N", &m, &n, &m, & c_b94, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b84, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { smmch_("N", transa, &m, &n, &n, & c_b94, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b84, &b[b_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_false, ( ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___228.ciunit = *nout; s_wsfe(&io___228); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L150: io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of SCHK3. */ } /* schk3_ */ /* Subroutine */ int schk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; real als; integer ict, icu; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas, ldcs; logical same; real bets; logical tran, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer * , real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___268 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___269 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___272 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___278 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___280 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___281 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___282 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 10; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ smake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___268.ciunit = *ntra; s_wsfe(&io___268); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &beta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1) ; /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___269.ciunit = *nout; s_wsfe(&io___269); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = bets == beta; if (null) { isame[8] = lse_(&cs[1], &cc[1], &lcc); } else { isame[8] = lseres_("SY", uplo, &n, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___272.ciunit = *nout; s_wsfe(&io___272); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { smmch_("T", "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { smmch_("N", "T", &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___278.ciunit = *nout; s_wsfe(&io___278); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: if (n > 1) { io___280.ciunit = *nout; s_wsfe(&io___280); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___281.ciunit = *nout; s_wsfe(&io___281); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___282.ciunit = *nout; s_wsfe(&io___282); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of SCHK4. */ } /* schk4_ */ /* Subroutine */ int schk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *ab, real *aa, real *as, real * bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, real * w, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; real als; integer ict, icu; extern logical lse_(real *, real *, integer *); real err; integer jjab; real beta; integer ldas, ldbs, ldcs; logical same; real bets; logical tran, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer * , real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int ssyr2k_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___322 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___323 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___333 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___334 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; smake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { smake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; smake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { smake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b84, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___322.ciunit = *ntra; s_wsfe(&io___322); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &bb[1], &ldb, &beta, &cc[1], &ldc, ( ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___323.ciunit = *nout; s_wsfe(&io___323); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lse_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bets == beta; if (null) { isame[10] = lse_(&cs[1], &cc[1], &lcc); } else { isame[10] = lseres_("SY", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___326.ciunit = *nout; s_wsfe(&io___326); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(j - 1 << 1) * *nmax + k + i__]; w[k + i__] = ab[(j - 1 << 1) * * nmax + i__]; /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; smmch_("T", "N", &lj, &c__1, &i__6, & alpha, &ab[jjab], &i__7, &w[1] , &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(k + i__ - 1) * *nmax + j]; w[k + i__] = ab[(i__ - 1) * *nmax + j]; /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; smmch_("N", "N", &lj, &c__1, &i__6, & alpha, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___333.ciunit = *nout; s_wsfe(&io___333); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___334.ciunit = *nout; s_wsfe(&io___334); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L140: if (n > 1) { io___335.ciunit = *nout; s_wsfe(&io___335); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of SCHK5. */ } /* schk5_ */ /* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ real a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), ssyr2k_( char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___343 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___344 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA and BETA (eca) */ /* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA and BETA. */ alpha = 1.f; beta = 2.f; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; } L10: infoc_1.infot = 1; sgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; sgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L20: infoc_1.infot = 1; ssymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L30: infoc_1.infot = 1; strmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L40: infoc_1.infot = 1; strsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L50: infoc_1.infot = 1; ssyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyrk_("U", "/", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L60: infoc_1.infot = 1; ssyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyr2k_("U", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L70: if (infoc_1.ok) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___344.ciunit = *nout; s_wsfe(&io___344); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of SCHKE. */ } /* schke_ */ /* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, integer *n, real *a, integer *nmax, real *aa, integer *lda, logical * reset, real *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j; logical gen, tri, sym; integer ibeg, iend; extern real sbeg_(logical *); logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { a[i__ + j * a_dim1] = sbeg_(reset) + *transl; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { a[i__ + j * a_dim1] = 0.f; } if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.f; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.f; } if (unit) { a[j + j * a_dim1] = 1.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L80: */ } /* L90: */ } } return 0; /* End of SMAKE. */ } /* smake_ */ /* Subroutine */ int smmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, real *alpha, real *a, integer *lda, real *b, integer * ldb, real *beta, real *c__, integer *ldc, real *ct, real *g, real *cc, integer *ldcc, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; real erri; logical trana, tranb; /* Fortran I/O blocks */ static cilist io___361 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___362 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___363 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___364 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = 0.f; g[i__] = 0.f; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[k + j * b_dim1]; g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = b[k + j * b_dim1], abs(r__2)); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[k + j * b_dim1]; g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 = b[k + j * b_dim1], abs(r__2)); /* L40: */ } /* L50: */ } } else if (! trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[j + k * b_dim1]; g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = b[j + k * b_dim1], abs(r__2)); /* L60: */ } /* L70: */ } } else if (trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[j + k * b_dim1]; g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 = b[j + k * b_dim1], abs(r__2)); /* L80: */ } /* L90: */ } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = *alpha * ct[i__] + *beta * c__[i__ + j * c_dim1]; g[i__] = abs(*alpha) * g[i__] + abs(*beta) * (r__1 = c__[i__ + j * c_dim1], abs(r__1)); /* L100: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { erri = (r__1 = ct[i__] - cc[i__ + j * cc_dim1], abs(r__1)) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L130; } /* L110: */ } /* L120: */ } /* If the loop completes, all results are at least half accurate. */ goto L150; /* Report fatal error. */ L130: *fatal = TRUE_; io___361.ciunit = *nout; s_wsfe(&io___361); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___362.ciunit = *nout; s_wsfe(&io___362); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); e_wsfe(); } else { io___363.ciunit = *nout; s_wsfe(&io___363); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L140: */ } if (*n > 1) { io___364.ciunit = *nout; s_wsfe(&io___364); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: return 0; /* End of SMMCH. */ } /* smmch_ */ logical lse_(real *ri, real *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LSE. */ } /* lse_ */ logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, real *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LSERES. */ } /* lseres_ */ real sbeg_(logical *reset) { /* System generated locals */ real ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (i__ - 500) / 1001.f; return ret_val; /* End of SBEG. */ } /* sbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___374 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___374.ciunit = *nout; s_wsfe(&io___374); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___375.ciunit = infoc_2.nout; s_wsfe(&io___375); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___376.ciunit = infoc_2.nout; s_wsfe(&io___376); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___377.ciunit = infoc_2.nout; s_wsfe(&io___377); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int sblat3_ () { main (); return 0; } blis-1.1/blastest/src/zblat1.c000066400000000000000000000701411474157777200163050ustar00rootroot00000000000000/* zblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy, mode; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static integer c__5 = 5; static doublereal c_b43 = 1.; static doublereal c_b52 = 0.; /* > \brief \b ZBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM ZBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX*16 Level 1 BLAS. */ /* > */ /* > Based upon the original BLAS test routine together with: */ /* > F06GAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex16_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "zblat1"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static doublereal sfac = 9.765625e-4; /* Format strings */ static char fmt_99999[] = "(\002 Complex BLAS Test Program Results\002,/" "1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check1_(doublereal *), check2_(doublereal *), header_(void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 10; ++ic) { combla_1.icase = ic; header_(); /* Initialize PASS, INCX, INCY, and MODE for a new case. */ /* The value 9999 for INCX, INCY or MODE will appear in the */ /* detailed output, if any, for cases that do not involve */ /* these parameters. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; combla_1.mode = 9999; if (combla_1.icase <= 5) { check2_(&sfac); } else if (combla_1.icase >= 6) { check1_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*10] = "ZDOTC " "ZDOTU " "ZAXPY " "ZCOPY " "ZSWAP " "DZNR" "M2" "DZASUM" "ZSCAL " "ZDSCAL" "IZAMAX"; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check1_(doublereal *sfac) { /* Initialized data */ static doublereal strue2[5] = { 0.,.5,.6,.7,.8 }; static doublereal strue4[5] = { 0.,.7,1.,1.3,1.6 }; static doublecomplex ctrue5[80] /* was [8][5][2] */ = { {.1,.1},{1., 2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{-.16,-.37},{ 3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{-.17,-.19} ,{.13,-.39},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{.11, -.03},{-.17,.46},{-.17,-.19},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{7., 8.},{.19,-.17},{.2,-.35},{.35,.2},{.14,.08},{2.,3.},{2.,3.},{2., 3.},{2.,3.},{.1,.1},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4., 5.},{4.,5.},{-.16,-.37},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{ 6.,7.},{6.,7.},{-.17,-.19},{8.,9.},{.13,-.39},{2.,5.},{2.,5.},{2., 5.},{2.,5.},{2.,5.},{.11,-.03},{3.,6.},{-.17,.46},{4.,7.},{-.17, -.19},{7.,2.},{7.,2.},{7.,2.},{.19,-.17},{5.,8.},{.2,-.35},{6.,9.} ,{.35,.2},{8.,3.},{.14,.08},{9.,4.} }; static doublecomplex ctrue6[80] /* was [8][5][2] */ = { {.1,.1},{1., 2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{.09,-.12},{ 3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{.03,-.09}, {.15,-.03},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{.03, .03},{-.18,.03},{.03,-.09},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{7.,8.} ,{.09,.03},{.15,0.},{0.,.15},{0.,.06},{2.,3.},{2.,3.},{2.,3.},{2., 3.},{.1,.1},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4., 5.},{.09,-.12},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{ 6.,7.},{.03,-.09},{8.,9.},{.15,-.03},{2.,5.},{2.,5.},{2.,5.},{2., 5.},{2.,5.},{.03,.03},{3.,6.},{-.18,.03},{4.,7.},{.03,-.09},{7., 2.},{7.,2.},{7.,2.},{.09,.03},{5.,8.},{.15,0.},{6.,9.},{0.,.15},{ 8.,3.},{0.,.06},{9.,4.} }; static integer itrue3[5] = { 0,1,2,2,2 }; static doublereal sa = .3; static doublecomplex ca = {.4,-.7}; static doublecomplex cv[80] /* was [8][5][2] */ = { {.1,.1},{1.,2.},{1., 2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{.3,-.4},{3.,4.},{3., 4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{.1,-.3},{.5,-.1},{5., 6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{.1,.1},{-.6,.1},{.1, -.3},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{.3,.1},{.5,0.},{0., .5},{0.,.2},{2.,3.},{2.,3.},{2.,3.},{2.,3.},{.1,.1},{4.,5.},{4., 5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{.3,-.4},{6.,7.},{6., 7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{.1,-.3},{8.,9.},{.5, -.1},{2.,5.},{2.,5.},{2.,5.},{2.,5.},{2.,5.},{.1,.1},{3.,6.},{-.6, .1},{4.,7.},{.1,-.3},{7.,2.},{7.,2.},{7.,2.},{.3,.1},{5.,8.},{.5, 0.},{6.,9.},{0.,.5},{8.,3.},{0.,.2},{9.,4.} }; /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1; doublecomplex z__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; doublecomplex cx[8]; integer np1, len; extern /* Subroutine */ int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), ctest_(integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *); doublecomplex mwpcs[5], mwpct[5]; extern /* Subroutine */ int itest1_(integer *, integer *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *); extern /* Subroutine */ int stest1_(doublereal *, doublereal *, doublereal *, doublereal *), zdscal_(integer *, doublereal *, doublecomplex *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); extern doublereal dzasum_(integer *, doublecomplex *, integer *); /* Fortran I/O blocks */ static cilist io___19 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ - 1; i__3 = i__ + (np1 + combla_1.incx * 5 << 3) - 49; cx[i__2].r = cv[i__3].r, cx[i__2].i = cv[i__3].i; /* L20: */ } if (combla_1.icase == 6) { /* .. DZNRM2 .. */ d__1 = dznrm2_(&combla_1.n, cx, &combla_1.incx); stest1_(&d__1, &strue2[np1 - 1], &strue2[np1 - 1], sfac); } else if (combla_1.icase == 7) { /* .. DZASUM .. */ d__1 = dzasum_(&combla_1.n, cx, &combla_1.incx); stest1_(&d__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); } else if (combla_1.icase == 8) { /* .. ZSCAL .. */ zscal_(&combla_1.n, &ca, cx, &combla_1.incx); ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 9) { /* .. ZDSCAL .. */ zdscal_(&combla_1.n, &sa, cx, &combla_1.incx); ctest_(&len, cx, &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 10) { /* .. IZAMAX .. */ i__1 = izamax_(&combla_1.n, cx, &combla_1.incx); itest1_(&i__1, &itrue3[np1 - 1]); } else { s_wsle(&io___19); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } combla_1.incx = 1; if (combla_1.icase == 8) { /* ZSCAL */ /* Add a test for alpha equal to zero. */ ca.r = 0., ca.i = 0.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0., mwpct[i__1].i = 0.; i__1 = i__ - 1; mwpcs[i__1].r = 1., mwpcs[i__1].i = 1.; /* L80: */ } zscal_(&c__5, &ca, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } else if (combla_1.icase == 9) { /* ZDSCAL */ /* Add a test for alpha equal to zero. */ sa = 0.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0., mwpct[i__1].i = 0.; i__1 = i__ - 1; mwpcs[i__1].r = 1., mwpcs[i__1].i = 1.; /* L100: */ } zdscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to one. */ sa = 1.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; mwpct[i__1].r = cx[i__2].r, mwpct[i__1].i = cx[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; mwpcs[i__1].r = cx[i__2].r, mwpcs[i__1].i = cx[i__2].i; /* L120: */ } zdscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to minus one. */ sa = -1.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; z__1.r = -cx[i__2].r, z__1.i = -cx[i__2].i; mwpct[i__1].r = z__1.r, mwpct[i__1].i = z__1.i; i__1 = i__ - 1; i__2 = i__ - 1; z__1.r = -cx[i__2].r, z__1.i = -cx[i__2].i; mwpcs[i__1].r = z__1.r, mwpcs[i__1].i = z__1.i; /* L140: */ } zdscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } return 0; } /* check1_ */ /* Subroutine */ int check2_(doublereal *sfac) { /* Initialized data */ static doublecomplex ca = {.4,-.7}; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static doublecomplex cx1[7] = { {.7,-.8},{-.4,-.7},{-.1,-.9},{.2,-.8},{ -.9,-.4},{.1,.4},{-.6,.6} }; static doublecomplex cy1[7] = { {.6,-.6},{-.9,.5},{.7,-.6},{.1,-.5},{-.1, -.2},{-.5,-.3},{.8,-.7} }; static doublecomplex ct8[112] /* was [7][4][4] */ = { {.6,-.6},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{-1.55,.5},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{-1.55,.5},{.03, -.89},{-.38,-.96},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.} ,{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{-.07,-.89},{-.9,.5},{.42,-1.41},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{.78,.06},{-.9,.5},{.06,-.13},{.1,-.5} ,{-.77,-.49},{-.5,-.3},{.52,-1.51},{.6,-.6},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{-.07,-.89},{-1.18,-.31},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{.78,.06},{-1.54,.97},{.03,-.89},{-.18, -1.31},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{0.,0.},{0.,0.},{0.,0.} ,{0.,0.},{0.,0.},{.32,-1.41},{-.9,.5},{.05,-.6},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{.32,-1.41},{-.9,.5},{.05,-.6},{.1,-.5},{-.77,-.49} ,{-.5,-.3},{.32,-1.16} }; static doublecomplex ct7[16] /* was [4][4] */ = { {0.,0.},{-.06, -.9},{.65,-.47},{-.34,-1.22},{0.,0.},{-.06,-.9},{-.59,-1.46},{ -1.04,-.04},{0.,0.},{-.06,-.9},{-.83,.59},{.07,-.37},{0.,0.},{ -.06,-.9},{-.76,-1.15},{-1.33,-1.82} }; static doublecomplex ct6[16] /* was [4][4] */ = { {0.,0.},{.9,.06}, {.91,-.77},{1.8,-.1},{0.,0.},{.9,.06},{1.45,.74},{.2,.9},{0.,0.},{ .9,.06},{-.55,.23},{.83,-.39},{0.,0.},{.9,.06},{1.04,.79},{1.95, 1.22} }; static doublecomplex ct10x[112] /* was [7][4][4] */ = { {.7,-.8},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{-.9,.5},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{-.9,.5},{.7,-.6},{.1,-.5},{ 0.,0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{.7,-.6},{-.4,-.7},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.} ,{.8,-.7},{-.4,-.7},{-.1,-.2},{.2,-.8},{.7,-.6},{.1,.4},{.6,-.6},{ .7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{-.9,.5},{-.4,-.7}, {.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.1,-.5},{-.4,-.7},{.7, -.6},{.2,-.8},{-.9,.5},{.1,.4},{.6,-.6},{.7,-.8},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{.6,-.6},{.7,-.6},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{.6,-.6},{.7,-.6},{-.1,-.2},{.8,-.7},{0.,0.},{0., 0.},{0.,0.} }; static doublecomplex ct10y[112] /* was [7][4][4] */ = { {.6,-.6},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.4,-.7},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.4,-.7},{-.1,-.9},{.2, -.8},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{-.1,-.9},{-.9,.5},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{-.6,.6},{-.9,.5},{-.9,-.4},{.1,-.5},{-.1,-.9},{-.5,-.3},{ .7,-.8},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{ .7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{-.1,-.9}, {.7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{-.6,.6},{-.9, -.4},{-.1,-.9},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.9,.5},{-.4,-.7},{0.,0.} ,{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.9,.5},{-.4,-.7},{.1,-.5},{ -.1,-.9},{-.5,-.3},{.2,-.8} }; static doublecomplex csize1[4] = { {0.,0.},{.9,.9},{1.63,1.73},{2.9,2.78} }; static doublecomplex csize3[14] = { {0.,0.},{0.,0.},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{1.17,1.17},{1.17,1.17},{1.17,1.17},{1.17, 1.17},{1.17,1.17},{1.17,1.17},{1.17,1.17} }; static doublecomplex csize2[14] /* was [7][2] */ = { {0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{1.54,1.54},{1.54,1.54},{ 1.54,1.54},{1.54,1.54},{1.54,1.54},{1.54,1.54},{1.54,1.54} }; /* System generated locals */ integer i__1, i__2; doublecomplex z__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, ki, kn; doublecomplex cx[7], cy[7]; integer mx, my; doublecomplex cdot[1]; integer lenx, leny; extern /* Subroutine */ int ctest_(integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *); extern /* Double Complex */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL void zdotc_(doublecomplex *, #else doublecomplex zdotc_( #endif integer *, doublecomplex *, integer *, doublecomplex *, integer *); integer ksize; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern /* Double Complex */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL void zdotu_(doublecomplex *, #else doublecomplex zdotu_( #endif integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *); /* Fortran I/O blocks */ static cilist io___48 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; cx[i__1].r = cx1[i__2].r, cx[i__1].i = cx1[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; cy[i__1].r = cy1[i__2].r, cy[i__1].i = cy1[i__2].i; /* L20: */ } if (combla_1.icase == 1) { /* .. ZDOTC .. */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL zdotc_(&z__1, #else z__1 = zdotc_( #endif &combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = z__1.r, cdot[0].i = z__1.i; ctest_(&c__1, cdot, &ct6[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. ZDOTU .. */ #ifdef BLIS_ENABLE_COMPLEX_RETURN_INTEL zdotu_(&z__1, #else z__1 = zdotu_( #endif &combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = z__1.r, cdot[0].i = z__1.i; ctest_(&c__1, cdot, &ct7[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 3) { /* .. ZAXPY .. */ zaxpy_(&combla_1.n, &ca, cx, &combla_1.incx, cy, & combla_1.incy); ctest_(&leny, cy, &ct8[(kn + (ki << 2)) * 7 - 35], &csize2[ ksize * 7 - 7], sfac); } else if (combla_1.icase == 4) { /* .. ZCOPY .. */ zcopy_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else if (combla_1.icase == 5) { /* .. ZSWAP .. */ zswap_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&lenx, cx, &ct10x[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else { s_wsle(&io___48); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } return 0; } /* check2_ */ /* Subroutine */ int stest_(integer *len, doublereal *scomp, doublereal * strue, doublereal *ssize, doublereal *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE I " " \002,\002 COMP(I) TRU" "E(I) DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,i3,2d36.8,2d12.4)"; /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; doublereal sd; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___51 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___52 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___53 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((d__2 = *sfac * sd, abs(d__2)) <= (d__1 = ssize[i__], abs(d__1)) * d_epsilon_(&c_b52)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___51); e_wsfe(); s_wsfe(&io___52); e_wsfe(); L20: s_wsfe(&io___53); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, doublereal *ssize, doublereal *sfac) { doublereal scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ doublereal sdiff_(doublereal *sa, doublereal *sb) { /* System generated locals */ doublereal ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int ctest_(integer *len, doublecomplex *ccomp, doublecomplex *ctrue, doublecomplex *csize, doublereal *sfac) { /* System generated locals */ integer i__1, i__2; /* Builtin functions */ double d_imag(const doublecomplex *); /* Local variables */ integer i__; doublereal scomp[20], ssize[20], strue[20]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* **************************** CTEST ***************************** */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --csize; --ctrue; --ccomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; scomp[(i__ << 1) - 2] = ccomp[i__2].r; scomp[(i__ << 1) - 1] = d_imag(&ccomp[i__]); i__2 = i__; strue[(i__ << 1) - 2] = ctrue[i__2].r; strue[(i__ << 1) - 1] = d_imag(&ctrue[i__]); i__2 = i__; ssize[(i__ << 1) - 2] = csize[i__2].r; ssize[(i__ << 1) - 1] = d_imag(&csize[i__]); /* L20: */ } i__1 = *len << 1; stest_(&i__1, scomp, strue, ssize, sfac); return 0; } /* ctest_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE " " \002,\002 COMP TRU" "E DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___60 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___61 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___63 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___60); e_wsfe(); s_wsfe(&io___61); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___63); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int zblat1_ () { main (); return 0; } blis-1.1/blastest/src/zblat2.c000066400000000000000000005055051474157777200163150ustar00rootroot00000000000000/* zblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static doublereal c_b122 = 0.; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b ZBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM ZBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX*16 Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 17 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 35 lines: */ /* > 'zblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZGERC T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZGERU T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex16_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "zblat2"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*17] = "ZGEMV " "ZGBMV " "ZHEMV " "ZHBMV " "ZHPMV " "ZTRMV " "ZTBMV " "ZTPMV " "ZTRSV " "ZTBSV " "ZTPSV " "ZGERC " "ZGERU " "ZHER " "ZHPR " "ZHER2 " "ZHPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE COMPLEX*16 LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9988[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9985[] = "(\002 ERROR IN ZMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 ZMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublecomplex a[4225] /* was [65][65] */; doublereal g[65]; integer i__, j, n; doublecomplex x[65], y[65], z__[130], aa[4225]; integer kb[7]; doublecomplex as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7] ; integer inc[7], nkb; doublecomplex bet[7]; doublereal eps, err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk5_( char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk6_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int zchke_(integer *, char *, integer *, ftnlen); char snaps[32], trans[1]; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer isnum; logical ltest[17], sfatal; char snamet[6]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 17; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 17; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b122); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of ZMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); a[i__3].r = (doublereal) i__4, a[i__3].i = 0.; /* L110: */ } i__2 = j - 1; x[i__2].r = (doublereal) j, x[i__2].i = 0.; i__2 = j - 1; y[i__2].r = 0., y[i__2].i = 0.; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; yy[i__2].r = (doublereal) i__3, yy[i__2].i = 0.; /* L130: */ } /* YY holds the exact result. On exit from ZMVCH YT holds */ /* the result computed by ZMVCH. */ *(unsigned char *)trans = 'N'; zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lze_(yy, yt, &n); if (! same || err != 0.) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lze_(yy, yt, &n); if (! same || err != 0.) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 17; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { zchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L170; case 14: goto L180; case 15: goto L180; case 16: goto L190; case 17: goto L190; } /* Test ZGEMV, 01, and ZGBMV, 02. */ L140: zchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test ZHEMV, 03, ZHBMV, 04, and ZHPMV, 05. */ L150: zchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test ZTRMV, 06, ZTBMV, 07, ZTPMV, 08, */ /* ZTRSV, 09, ZTBSV, 10, and ZTPSV, 11. */ L160: zchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test ZGERC, 12, ZGERU, 13. */ L170: zchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test ZHER, 14, and ZHPR, 15. */ L180: zchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test ZHER2, 16, and ZHPR2, 17. */ L190: zchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of ZBLAT2. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; doublecomplex als, bls; doublereal err; integer iku, kls; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer kus; doublecomplex beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; extern /* Subroutine */ int zgbmv_(char *, integer *, integer *, integer * , integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); char trans[1]; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical banded; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZGEMV and ZGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; zmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__4 = abs(incx); i__5 = nl - 1; zmake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { i__4 = nl / 2; x[i__4].r = 0., x[i__4].i = 0.; i__4 = abs(incx) * (nl / 2 - 1) + 1; xx[i__4].r = 0., xx[i__4].i = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7] .i; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__7 = abs(incy); i__8 = ml - 1; zmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lze_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lze_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lzeres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lze_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lze_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lze_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lzeres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ zmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of ZCHK1. */ } /* zchk1_ */ /* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3,\002, X,\002," "i2,\002,(\002,f4.1,\002,\002,f4.1,\002), \002,\002Y,\002,i2,\002" ") .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), AP, X,\002,i2,\002,(" "\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2,\002) " ".\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; doublecomplex als, bls; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; extern /* Subroutine */ int zhbmv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen), zhemv_(char *, integer * , doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); char uplos[1]; extern /* Subroutine */ int zhpmv_(char *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); logical banded, packed; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZHEMV, ZHBMV and ZHPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__4 = abs(incx); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0., x[i__4].i = 0.; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0., xx[i__4].i = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7].i; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__7 = abs(incy); i__8 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[ i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[ i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[ i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhemv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhpmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lze_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[8] = lze_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lzeres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lze_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lze_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lzeres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&as[1], &aa[1], &laa); isame[4] = lze_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[7] = lze_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lzeres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ zmvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK2. */ } /* zchk2_ */ /* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *xt, doublereal *g, doublecomplex *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1], diags[1]; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs; char trans[1]; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); char uplos[1]; extern /* Subroutine */ int ztbmv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztbsv_(char *, char *, char *, integer * , integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpmv_(char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpsv_(char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrsv_(char *, char *, char *, integer *, doublecomplex * , integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZTRMV, ZTBMV, ZTPMV, ZTRSV, ZTBSV and ZTPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero vector for ZMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; z__[i__2].r = 0., z__[i__2].i = 0.; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; zmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__4 = abs(incx); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0., x[i__4].i = 0.; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0., xx[i__4].i = 0.; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6] .i; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6] .i; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lze_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lzeres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lze_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lzeres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lze_(&as[1], &aa[1], &laa); if (null) { isame[5] = lze_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lzeres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ zmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &x[1], &incx, &c_b1, &z__[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = (i__ - 1) * abs(incx) + 1; z__[i__5].r = xx[i__6].r, z__[i__5].i = xx[i__6].i; i__5 = (i__ - 1) * abs(incx) + 1; i__6 = i__; xx[i__5].r = x[i__6].r, xx[i__5].i = x[i__6].i; /* L50: */ } zmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &z__[1], &incx, &c_b1, &x[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK3. */ } /* zchk3_ */ /* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002," "\002),\002(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y," "\002,i2,\002, A,\002,i3,\002) \002,\002 " ".\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, m, n; doublecomplex w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; doublecomplex als; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer ldas; logical same, conj; integer incx, incy; logical null; doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); logical reset; integer incxs, incys; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen), zgeru_( integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___285 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___297 = { 0, 0, 0, fmt_9994, 0 }; /* Tests ZGERC and ZGERU. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ conj = *(unsigned char *)&sname[4] == 'C'; /* Define the number of arguments. */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__3 = abs(incx); i__4 = m - 1; zmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { i__3 = m / 2; x[i__3].r = 0., x[i__3].i = 0.; i__3 = abs(incx) * (m / 2 - 1) + 1; xx[i__3].r = 0., xx[i__3].i = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__4 = abs(incy); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0., y[i__4].i = 0.; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0., yy[i__4].i = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; i__5 = m - 1; i__6 = n - 1; zmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___285.ciunit = *ntra; s_wsfe(&io___285); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (conj) { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgerc_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } else { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgeru_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___286.ciunit = *nout; s_wsfe(&io___286); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lze_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lze_(&as[1], &aa[1], &laa); } else { isame[7] = lzeres_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = m - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { i__6 = j; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } else { i__6 = n - j + 1; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } if (conj) { d_cnjg(&z__1, w); w[0].r = z__1.r, w[0].i = z__1.i; } zmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b2, &a[j * a_dim1 + 1], & c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L150; L130: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___297.ciunit = *nout; s_wsfe(&io___297); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of ZCHK4. */ } /* zchk4_ */ /* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, n; doublecomplex w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer ldas; logical same; doublereal rals; integer incx; logical full; extern /* Subroutine */ int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen); logical null; char uplo[1]; extern /* Subroutine */ int zhpr_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical upper; char uplos[1]; logical packed; doublereal ralpha, errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___326 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___327 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___328 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___331 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___342 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___343 = { 0, 0, 0, fmt_9994, 0 }; /* Tests ZHER and ZHPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__3 = abs(incx); i__4 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0., x[i__3].i = 0.; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0., xx[i__3].i = 0.; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; ralpha = alf[i__4].r; z__1.r = ralpha, z__1.i = 0.; alpha.r = z__1.r, alpha.i = z__1.i; null = n <= 0 || ralpha == 0.; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; i__4 = n - 1; i__5 = n - 1; zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; rals = ralpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6].i; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6].i; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___326.ciunit = *ntra; s_wsfe(&io___326); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___327.ciunit = *ntra; s_wsfe(&io___327); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhpr_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___328.ciunit = *nout; s_wsfe(&io___328); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = rals == ralpha; isame[3] = lze_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lze_(&as[1], &aa[1], &laa); } else { isame[5] = lzeres_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___331.ciunit = *nout; s_wsfe(&io___331); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = n - i__ + 1; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { d_cnjg(&z__1, &z__[j]); w[0].r = z__1.r, w[0].i = z__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } zmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___342.ciunit = *nout; s_wsfe(&io___342); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK5. */ } /* zchk5_ */ /* Subroutine */ int zchk6_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, A,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, AP) \002,\002 .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1, z__2, z__3; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, n; doublecomplex w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; doublecomplex als; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), zhpr2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical upper; char uplos[1]; logical packed; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___380 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___391 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___392 = { 0, 0, 0, fmt_9994, 0 }; /* Tests ZHER2 and ZHPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__3 = abs(incx); i__4 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0., x[i__3].i = 0.; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0., xx[i__3].i = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__4 = abs(incy); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0., y[i__4].i = 0.; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0., yy[i__4].i = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; null = n <= 0 || alpha.r == 0. && alpha.i == 0.; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; i__5 = n - 1; i__6 = n - 1; zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___375.ciunit = *ntra; s_wsfe(&io___375); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zher2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___376.ciunit = *ntra; s_wsfe(&io___376); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhpr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___377.ciunit = *nout; s_wsfe(&io___377); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lze_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lze_(&as[1], &aa[1], &laa); } else { isame[7] = lzeres_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___380.ciunit = *nout; s_wsfe(&io___380); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = n - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = i__; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = n - i__ + 1; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { d_cnjg(&z__2, &z__[j + (z_dim1 << 1)]); z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, z__1.i = alpha.r * z__2.i + alpha.i * z__2.r; w[0].r = z__1.r, w[0].i = z__1.i; d_cnjg(&z__2, &alpha); d_cnjg(&z__3, &z__[j + z_dim1]); z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; w[1].r = z__1.r, w[1].i = z__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } zmvch_("N", &lj, &c__2, &c_b2, &z__[jj + z_dim1], nmax, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], & aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L170; L150: io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___391.ciunit = *nout; s_wsfe(&io___391); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___392.ciunit = *nout; s_wsfe(&io___392); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of ZCHK6. */ } /* zchk6_ */ /* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublecomplex a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), zhpr_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, ftnlen), zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), zhpr2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zgbmv_(char *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zhbmv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zhemv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zgeru_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), ztbmv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), zhpmv_(char *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), ztbsv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpmv_( char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpsv_(char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen); doublereal ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___399 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; case 17: goto L170; } L10: infoc_1.infot = 1; zgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; zgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; zgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L20: infoc_1.infot = 1; zgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L30: infoc_1.infot = 1; zhemv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhemv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zhemv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zhemv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L40: infoc_1.infot = 1; zhbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; zhbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zhbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; zhbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L50: infoc_1.infot = 1; zhpmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhpmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; zhpmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhpmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L60: infoc_1.infot = 1; ztrmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ztrmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L70: infoc_1.infot = 1; ztbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L80: infoc_1.infot = 1; ztpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L90: infoc_1.infot = 1; ztrsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ztrsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L100: infoc_1.infot = 1; ztbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L110: infoc_1.infot = 1; ztpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L120: infoc_1.infot = 1; zgerc_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgerc_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgerc_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zgerc_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zgerc_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L130: infoc_1.infot = 1; zgeru_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgeru_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgeru_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zgeru_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zgeru_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L140: infoc_1.infot = 1; zher_("/", &c__0, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zher_("U", &c_n1, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zher_("U", &c__0, &ralpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher_("U", &c__2, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L150: infoc_1.infot = 1; zhpr_("/", &c__0, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhpr_("U", &c_n1, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zhpr_("U", &c__0, &ralpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L160: infoc_1.infot = 1; zher2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zher2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zher2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L170: infoc_1.infot = 1; zhpr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhpr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zhpr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhpr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L180: if (infoc_1.ok) { io___399.ciunit = *nout; s_wsfe(&io___399); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___400.ciunit = *nout; s_wsfe(&io___400); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of ZCHKE. */ } /* zchke_ */ /* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, integer *lda, integer *kl, integer *ku, logical *reset, doublecomplex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; doublereal d__1; doublecomplex z__1, z__2; /* Builtin functions */ void d_cnjg(doublecomplex *, const doublecomplex *); integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, jj, kk; logical gen, tri, sym; integer ibeg, iend, ioff; extern /* Double Complex */ void zbeg_(doublecomplex *, logical *); logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'H'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { i__3 = i__ + j * a_dim1; zbeg_(&z__2, reset); z__1.r = z__2.r + transl->r, z__1.i = z__2.i + transl->i; a[i__3].r = z__1.r, a[i__3].i = z__1.i; } else { i__3 = i__ + j * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } if (i__ != j) { if (sym) { i__3 = j + i__ * a_dim1; d_cnjg(&z__1, &a[i__ + j * a_dim1]); a[i__3].r = z__1.r, a[i__3].i = z__1.i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } } } /* L10: */ } if (sym) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = a[i__3].r; z__1.r = d__1, z__1.i = 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; z__1.r = a[i__3].r + 1., z__1.i = a[i__3].i + 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { i__3 = i1 + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { i__3 = i2 + (j - 1) * *lda; i__4 = i2 + j - *ku - 1 + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { i__3 = i3 + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L120: */ } if (sym) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; d__1 = aa[i__3].r; z__1.r = d__1, z__1.i = -1e10; aa[i__2].r = z__1.r, aa[i__2].i = z__1.i; } /* L130: */ } } else if (s_cmp(type__, "HB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j - kk + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L160: */ } if (sym) { jj = kk + (j - 1) * *lda; i__2 = jj; i__3 = jj; d__1 = aa[i__3].r; z__1.r = d__1, z__1.i = -1e10; aa[i__2].r = z__1.r, aa[i__2].i = z__1.i; } /* L170: */ } } else if (s_cmp(type__, "HP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; i__3 = ioff; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; if (i__ == j) { if (unit) { i__3 = ioff; aa[i__3].r = -1e10, aa[i__3].i = 1e10; } if (sym) { i__3 = ioff; i__4 = ioff; d__1 = aa[i__4].r; z__1.r = d__1, z__1.i = -1e10; aa[i__3].r = z__1.r, aa[i__3].i = z__1.i; } } /* L180: */ } /* L190: */ } } return 0; /* End of ZMAKE. */ } /* zmake_ */ /* Subroutine */ int zmvch_(char *trans, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *nmax, doublecomplex * x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * incy, doublecomplex *yt, doublereal *g, doublecomplex *yy, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3; /* Builtin functions */ double d_imag(const doublecomplex *); void d_cnjg(doublecomplex *, const doublecomplex *); double z_abs(const doublecomplex *), sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; doublereal erri; logical tran, ctran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___430 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___431 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___432 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T'; ctran = *(unsigned char *)trans == 'C'; if (tran || ctran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; yt[i__2].r = 0., yt[i__2].i = 0.; g[iy] = 0.; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = j + i__ * a_dim1; i__6 = jx; z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, z__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L10: */ } } else if (ctran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; d_cnjg(&z__3, &a[j + i__ * a_dim1]); i__5 = jx; z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, z__2.i = z__3.r * x[i__5].i + z__3.i * x[i__5].r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L20: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = i__ + j * a_dim1; i__6 = jx; z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, z__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = i__ + j * a_dim1; i__4 = jx; g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[ i__ + j * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L30: */ } } i__2 = iy; i__3 = iy; z__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, z__2.i = alpha->r * yt[i__3].i + alpha->i * yt[i__3].r; i__4 = iy; z__3.r = beta->r * y[i__4].r - beta->i * y[i__4].i, z__3.i = beta->r * y[i__4].i + beta->i * y[i__4].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; yt[i__2].r = z__1.r, yt[i__2].i = z__1.i; i__2 = iy; g[iy] = ((d__1 = alpha->r, abs(d__1)) + (d__2 = d_imag(alpha), abs( d__2))) * g[iy] + ((d__3 = beta->r, abs(d__3)) + (d__4 = d_imag(beta), abs(d__4))) * ((d__5 = y[i__2].r, abs(d__5)) + ( d__6 = d_imag(&y[iy]), abs(d__6))); iy += incyl; /* L40: */ } /* Compute the error ratio for this result. */ *err = 0.; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = (i__ - 1) * abs(*incy) + 1; z__1.r = yt[i__2].r - yy[i__3].r, z__1.i = yt[i__2].i - yy[i__3].i; erri = z_abs(&z__1) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L60; } /* L50: */ } /* If the loop completes, all results are at least half accurate. */ goto L80; /* Report fatal error. */ L60: *fatal = TRUE_; io___430.ciunit = *nout; s_wsfe(&io___430); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___431.ciunit = *nout; s_wsfe(&io___431); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); e_wsfe(); } else { io___432.ciunit = *nout; s_wsfe(&io___432); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L70: */ } L80: return 0; /* End of ZMVCH. */ } /* zmvch_ */ logical lze_(doublecomplex *ri, doublecomplex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LZE. */ } /* lze_ */ logical lzeres_(char *type__, char *uplo, integer *m, integer *n, doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'HE' or 'HP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LZERES. */ } /* lzeres_ */ /* Double Complex */ void zbeg_(doublecomplex * ret_val, logical *reset) { /* System generated locals */ doublereal d__1, d__2; doublecomplex z__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } d__1 = (i__ - 500) / 1001.; d__2 = (j - 500) / 1001.; z__1.r = d__1, z__1.i = d__2; ret_val->r = z__1.r, ret_val->i = z__1.i; return ; /* End of ZBEG. */ } /* zbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___444 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___444.ciunit = *nout; s_wsfe(&io___444); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___445 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___446 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___447 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___445.ciunit = infoc_2.nout; s_wsfe(&io___445); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___446.ciunit = infoc_2.nout; s_wsfe(&io___446); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___447.ciunit = infoc_2.nout; s_wsfe(&io___447); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int zblat2_ () { main (); return 0; } blis-1.1/blastest/src/zblat3.c000066400000000000000000006013321474157777200163110ustar00rootroot00000000000000/* zblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static doublereal c_b88 = 0.; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; /* > \brief \b ZBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM ZBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX*16 Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 9 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 23 lines: */ /* > 'zblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHERK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex16_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { #ifdef BLIS_ENABLE_HPX char* program = "zblat3"; bli_thread_initialize_hpx( 1, &program ); #endif /* Initialized data */ static char snames[6*9] = "ZGEMM " "ZHEMM " "ZSYMM " "ZTRMM " "ZTRSM " "ZHERK " "ZSYRK " "ZHER2K" "ZSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE COMPLEX*16 LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9992[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9989[] = "(\002 ERROR IN ZMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 ZMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublecomplex c__[4225] /* was [65][65] */; doublereal g[65]; integer i__, j, n; doublecomplex w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[4225], as[4225], bs[4225], cs[4225], ct[65], alf[7], bet[7]; doublereal eps, err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk5_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int zchke_(integer *, char *, integer *, ftnlen), zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); char snaps[32]; integer isnum; logical ltest[9], sfatal; char snamet[6], transa[1], transb[1]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 9; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 9; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b88); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of ZMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); ab[i__3].r = (doublereal) i__4, ab[i__3].i = 0.; /* L90: */ } i__2 = j + 4224; ab[i__2].r = (doublereal) j, ab[i__2].i = 0.; i__2 = (j + 65) * 65 - 65; ab[i__2].r = (doublereal) j, ab[i__2].i = 0.; i__2 = j - 1; c__[i__2].r = 0., c__[i__2].i = 0.; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (doublereal) i__3, cc[i__2].i = 0.; /* L110: */ } /* CC holds the exact result. On exit from ZMMCH CT holds */ /* the result computed by ZMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j + 4224; i__3 = n - j + 1; ab[i__2].r = (doublereal) i__3, ab[i__2].i = 0.; i__2 = (j + 65) * 65 - 65; i__3 = n - j + 1; ab[i__2].r = (doublereal) i__3, ab[i__2].i = 0.; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n - j; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (doublereal) i__3, cc[i__2].i = 0.; /* L130: */ } *(unsigned char *)transa = 'C'; *(unsigned char *)transb = 'N'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 9; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { zchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L150; case 4: goto L160; case 5: goto L160; case 6: goto L170; case 7: goto L170; case 8: goto L180; case 9: goto L180; } /* Test ZGEMM, 01. */ L140: zchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZHEMM, 02, ZSYMM, 03. */ L150: zchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZTRMM, 04, ZTRSM, 05. */ L160: zchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test ZHERK, 06, ZSYRK, 07. */ L170: zchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZHER2K, 08, ZSYR2K, 09. */ L180: zchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of ZBLAT3. */ #ifdef BLIS_ENABLE_HPX return bli_thread_finalize_hpx(); #else // Return peacefully. return 0; #endif } /* main */ /* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1," "\002), A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1" ",\002), C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; doublecomplex als, bls; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas, ldbs, ldcs; logical same, null; doublecomplex alpha; logical isame[13], trana, tranb; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen), zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); logical reset; char tranas[1], tranbs[1], transa[1], transb[1]; doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ zmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ zmake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b1, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { i__6 = ib; beta.r = bet[i__6].r, beta.i = bet[i__6].i; /* Generate the matrix C. */ zmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; as[i__7].r = aa[i__8].r, as[i__7].i = aa[ i__8].i; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; bs[i__7].r = bb[i__8].r, bs[i__7].i = bb[ i__8].i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; cs[i__7].r = cc[i__8].r, cs[i__7].i = cc[ i__8].i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lze_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lze_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lze_(&cs[1], &cc[1], &lcc); } else { isame[11] = lzeres_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ zmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of ZCHK1. */ } /* zchk1_ */ /* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc, ics; doublecomplex als, bls; integer icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical conj, left, null; char uplo[1]; doublecomplex alpha; logical isame[13]; char sides[1]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); logical reset; char uplos[1]; extern /* Subroutine */ int zsymm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___172 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___173 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___176 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___181 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZHEMM and ZSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ zmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the hermitian or symmetric matrix A. */ zmake_(sname + 1, uplo, " ", &na, &na, &a[a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; /* Generate the matrix C. */ zmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___172.ciunit = *ntra; s_wsfe(&io___172); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } if (conj) { zhemm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { zsymm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___173.ciunit = *nout; s_wsfe(&io___173); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lze_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[10] = lze_(&cs[1], &cc[1], &lcc); } else { isame[10] = lzeres_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___176.ciunit = *nout; s_wsfe(&io___176); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { zmmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { zmmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L120; L110: io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___181.ciunit = *nout; s_wsfe(&io___181); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of ZCHK2. */ } /* zchk2_ */ /* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *ct, doublereal *g, doublecomplex *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb, ics; doublecomplex als; integer ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; doublecomplex alpha; char diags[1]; logical isame[13]; char sides[1]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical reset; char uplos[1]; extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char tranas[1], transa[1]; doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___224 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___227 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___232 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZTRMM and ZTRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero matrix for ZMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * c_dim1; c__[i__3].r = 0., c__[i__3].i = 0.; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; /* Generate the matrix A. */ zmake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); /* Generate the matrix B. */ zmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[ i__6].i; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; bs[i__5].r = bb[i__6].r, bs[i__5].i = bb[ i__6].i; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___223.ciunit = *ntra; s_wsfe(&io___223); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___224.ciunit = *nout; s_wsfe(&io___224); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als.r == alpha.r && als.i == alpha.i; isame[7] = lze_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lze_(&bs[1], &bb[1], &lbb); } else { isame[9] = lzeres_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___227.ciunit = *nout; s_wsfe(&io___227); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { zmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { zmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + j * c_dim1; i__7 = i__ + (j - 1) * ldb; c__[i__6].r = bb[i__7].r, c__[i__6].i = bb[i__7].i; i__6 = i__ + (j - 1) * ldb; i__7 = i__ + j * b_dim1; z__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, z__1.i = alpha.r * b[i__7].i + alpha.i * b[ i__7].r; bb[i__6].r = z__1.r, bb[i__6].i = z__1.i; /* L60: */ } /* L70: */ } if (left) { zmmch_(transa, "N", &m, &n, &m, & c_b2, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { zmmch_("N", transa, &m, &n, &n, & c_b2, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L150: io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___232.ciunit = *nout; s_wsfe(&io___232); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of ZCHK3. */ } /* zchk3_ */ /* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" " , A,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002), C,\002,i3," "\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; doublecomplex als; integer ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas, ldcs; logical same, conj; doublecomplex bets; doublereal rals; logical tran, null; char uplo[1]; doublecomplex alpha; doublereal rbeta; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); doublereal rbets; logical reset; extern /* Subroutine */ int zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int zsyrk_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal ralpha, errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___274 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___275 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___276 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___287 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___290 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___291 = { 0, 0, 0, fmt_9993, 0 }; /* Tests ZHERK and ZSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 10; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ zmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; if (conj) { ralpha = alpha.r; z__1.r = ralpha, z__1.i = 0.; alpha.r = z__1.r, alpha.i = z__1.i; } i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; z__1.r = rbeta, z__1.i = 0.; beta.r = z__1.r, beta.i = z__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || ralpha == 0.) && rbeta == 1.; } /* Generate the matrix C. */ zmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; if (conj) { rals = ralpha; } else { als.r = alpha.r, als.i = alpha.i; } i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___274.ciunit = *ntra; s_wsfe(&io___274); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zherk_(uplo, trans, &n, &k, &ralpha, &aa[1], & lda, &rbeta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___275.ciunit = *ntra; s_wsfe(&io___275); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zsyrk_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &beta, &cc[1], &ldc, (ftnlen)1, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___276.ciunit = *nout; s_wsfe(&io___276); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; if (conj) { isame[4] = rals == ralpha; } else { isame[4] = als.r == alpha.r && als.i == alpha.i; } isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (conj) { isame[7] = rbets == rbeta; } else { isame[7] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[8] = lze_(&cs[1], &cc[1], &lcc); } else { isame[8] = lzeres_(sname + 1, uplo, &n, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { zmmch_(transt, "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { zmmch_("N", transt, &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___286.ciunit = *nout; s_wsfe(&io___286); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___287.ciunit = *nout; s_wsfe(&io___287); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: if (n > 1) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___290.ciunit = *nout; s_wsfe(&io___290); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___291.ciunit = *nout; s_wsfe(&io___291); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK4. */ } /* zchk4_ */ /* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * ab, doublecomplex *aa, doublecomplex *as, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal *g, doublecomplex *w, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,\002,f4.1,\002, C,\002,i3,\002)" " .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; doublecomplex z__1, z__2; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; doublecomplex als; integer ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer jjab; doublecomplex beta; integer ldas, ldbs, ldcs; logical same, conj; doublecomplex bets; logical tran, null; char uplo[1]; doublecomplex alpha; doublereal rbeta; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); doublereal rbets; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen), zsyr2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___334 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___347 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___348 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___349 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___350 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___351 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___352 = { 0, 0, 0, fmt_9993, 0 }; /* Tests ZHER2K and ZSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; zmake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } else { zmake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; zmake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { zmake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; z__1.r = rbeta, z__1.i = 0.; beta.r = z__1.r, beta.i = z__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || alpha.r == 0. && alpha.i == 0.) && rbeta == 1.; } /* Generate the matrix C. */ zmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___334.ciunit = *ntra; s_wsfe(&io___334); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zher2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &rbeta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___335.ciunit = *ntra; s_wsfe(&io___335); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zsyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___336.ciunit = *nout; s_wsfe(&io___336); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lze_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; if (conj) { isame[9] = rbets == rbeta; } else { isame[9] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[10] = lze_(&cs[1], &cc[1], &lcc); } else { isame[10] = lzeres_("HE", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = (j - 1 << 1) * *nmax + k + i__; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8].i, z__1.i = alpha.r * ab[ i__8].i + alpha.i * ab[ i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; if (conj) { i__7 = k + i__; d_cnjg(&z__2, &alpha); i__8 = (j - 1 << 1) * *nmax + i__; z__1.r = z__2.r * ab[i__8].r - z__2.i * ab[i__8].i, z__1.i = z__2.r * ab[i__8].i + z__2.i * ab[ i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } else { i__7 = k + i__; i__8 = (j - 1 << 1) * *nmax + i__; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; zmmch_(transt, "N", &lj, &c__1, &i__6, &c_b2, &ab[jjab], &i__7, &w[ 1], &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1] , &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { if (conj) { i__7 = i__; d_cnjg(&z__2, &ab[(k + i__ - 1) * *nmax + j]); z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, z__1.i = alpha.r * z__2.i + alpha.i * z__2.r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; z__2.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__2.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; d_cnjg(&z__1, &z__2); w[i__7].r = z__1.r, w[i__7].i = z__1.i; } else { i__7 = i__; i__8 = (k + i__ - 1) * *nmax + j; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; zmmch_("N", "N", &lj, &c__1, &i__6, & c_b2, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___347.ciunit = *nout; s_wsfe(&io___347); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___348.ciunit = *nout; s_wsfe(&io___348); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L140: if (n > 1) { io___349.ciunit = *nout; s_wsfe(&io___349); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___350.ciunit = *nout; s_wsfe(&io___350); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___351.ciunit = *nout; s_wsfe(&io___351); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___352.ciunit = *nout; s_wsfe(&io___352); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L160: return 0; /* End of ZCHK5. */ } /* zchk5_ */ /* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublecomplex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; doublereal rbeta; extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen), ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), zsymm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer * , doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), zsyrk_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen), zsyr2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___360 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___361 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) */ /* 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM */ /* with INFOT = 9 (eca) */ /* 10-9-00: Declared INTRINSIC DCMPLX (susan) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA, BETA, RALPHA, and RBETA. */ alpha.r = 1., alpha.i = -1.; beta.r = 2., beta.i = -2.; ralpha = 1.f; rbeta = 2.f; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; } L10: infoc_1.infot = 1; zgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; zgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; zgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L20: infoc_1.infot = 1; zhemm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhemm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L30: infoc_1.infot = 1; zsymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zsymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L40: infoc_1.infot = 1; ztrmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L50: infoc_1.infot = 1; ztrsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L60: infoc_1.infot = 1; zherk_("/", "N", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zherk_("U", "T", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("U", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("U", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("L", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("L", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("U", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("U", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("L", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("L", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("U", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("L", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("U", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("L", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L70: infoc_1.infot = 1; zsyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zsyrk_("U", "C", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L80: infoc_1.infot = 1; zher2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zher2k_("U", "T", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("U", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("L", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("U", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("L", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("U", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("L", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L90: infoc_1.infot = 1; zsyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zsyr2k_("U", "C", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L100: if (infoc_1.ok) { io___360.ciunit = *nout; s_wsfe(&io___360); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___361.ciunit = *nout; s_wsfe(&io___361); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of ZCHKE. */ } /* zchke_ */ /* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, integer *lda, logical *reset, doublecomplex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; doublereal d__1; doublecomplex z__1, z__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, jj; logical gen, her, tri, sym; integer ibeg, iend; extern /* Double Complex */ void zbeg_(doublecomplex *, logical *); logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'HE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; her = s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (her || sym || tri) && *(unsigned char *)uplo == 'U'; lower = (her || sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { i__3 = i__ + j * a_dim1; zbeg_(&z__2, reset); z__1.r = z__2.r + transl->r, z__1.i = z__2.i + transl->i; a[i__3].r = z__1.r, a[i__3].i = z__1.i; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { i__3 = i__ + j * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } if (her) { i__3 = j + i__ * a_dim1; d_cnjg(&z__1, &a[i__ + j * a_dim1]); a[i__3].r = z__1.r, a[i__3].i = z__1.i; } else if (sym) { i__3 = j + i__ * a_dim1; i__4 = i__ + j * a_dim1; a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } } } /* L10: */ } if (her) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = a[i__3].r; z__1.r = d__1, z__1.i = 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; z__1.r = a[i__3].r + 1., z__1.i = a[i__3].i + 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen) 2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L80: */ } if (her) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; d__1 = aa[i__3].r; z__1.r = d__1, z__1.i = -1e10; aa[i__2].r = z__1.r, aa[i__2].i = z__1.i; } /* L90: */ } } return 0; /* End of ZMAKE. */ } /* zmake_ */ /* Subroutine */ int zmmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * c__, integer *ldc, doublecomplex *ct, doublereal *g, doublecomplex * cc, integer *ldcc, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3, z__4; /* Builtin functions */ double d_imag(const doublecomplex *); void d_cnjg(doublecomplex *, const doublecomplex *); double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; doublereal erri; logical trana, tranb, ctrana, ctranb; /* Fortran I/O blocks */ static cilist io___382 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___383 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___384 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; ctrana = *(unsigned char *)transa == 'C'; ctranb = *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; ct[i__3].r = 0., ct[i__3].i = 0.; g[i__] = 0.; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = k + j * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[ i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag( &a[i__ + k * a_dim1]), abs(d__2))) * ((d__3 = b[ i__5].r, abs(d__3)) + (d__4 = d_imag(&b[k + j * b_dim1]), abs(d__4))); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { if (ctrana) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); i__6 = k + j * b_dim1; z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, z__2.i = z__3.r * b[i__6].i + z__3.i * b[i__6] .r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[k + j * b_dim1]), abs(d__4))); /* L40: */ } /* L50: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = k + j * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, z__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[k + j * b_dim1]), abs(d__4))); /* L60: */ } /* L70: */ } } } else if (! trana && tranb) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; d_cnjg(&z__3, &b[j + k * b_dim1]); z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, z__2.i = a[i__6].r * z__3.i + a[i__6].i * z__3.r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[j + k * b_dim1]), abs(d__4))); /* L80: */ } /* L90: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = j + k * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, z__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[j + k * b_dim1]), abs(d__4))); /* L100: */ } /* L110: */ } } } else if (trana && tranb) { if (ctrana) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); d_cnjg(&z__4, &b[j + k * b_dim1]); z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i + z__3.i * z__4.r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L120: */ } /* L130: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); i__6 = j + k * b_dim1; z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, z__2.i = z__3.r * b[i__6].i + z__3.i * b[ i__6].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L140: */ } /* L150: */ } } } else { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; d_cnjg(&z__3, &b[j + k * b_dim1]); z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, z__2.i = a[i__6].r * z__3.i + a[i__6].i * z__3.r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L160: */ } /* L170: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = j + k * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[ i__7].i, z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L180: */ } /* L190: */ } } } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; z__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, z__2.i = alpha->r * ct[i__4].i + alpha->i * ct[i__4].r; i__5 = i__ + j * c_dim1; z__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, z__3.i = beta->r * c__[i__5].i + beta->i * c__[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ct[i__3].r = z__1.r, ct[i__3].i = z__1.i; i__3 = i__ + j * c_dim1; g[i__] = ((d__1 = alpha->r, abs(d__1)) + (d__2 = d_imag(alpha), abs(d__2))) * g[i__] + ((d__3 = beta->r, abs(d__3)) + ( d__4 = d_imag(beta), abs(d__4))) * ((d__5 = c__[i__3].r, abs(d__5)) + (d__6 = d_imag(&c__[i__ + j * c_dim1]), abs( d__6))); /* L200: */ } /* Compute the error ratio for this result. */ *err = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__ + j * cc_dim1; z__2.r = ct[i__3].r - cc[i__4].r, z__2.i = ct[i__3].i - cc[i__4] .i; z__1.r = z__2.r, z__1.i = z__2.i; erri = ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), abs( d__2))) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L230; } /* L210: */ } /* L220: */ } /* If the loop completes, all results are at least half accurate. */ goto L250; /* Report fatal error. */ L230: *fatal = TRUE_; io___382.ciunit = *nout; s_wsfe(&io___382); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___383.ciunit = *nout; s_wsfe(&io___383); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); e_wsfe(); } else { io___384.ciunit = *nout; s_wsfe(&io___384); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L240: */ } if (*n > 1) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L250: return 0; /* End of ZMMCH. */ } /* zmmch_ */ logical lze_(doublecomplex *ri, doublecomplex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LZE. */ } /* lze_ */ logical lzeres_(char *type__, char *uplo, integer *m, integer *n, doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'HE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LZERES. */ } /* lzeres_ */ /* Double Complex */ void zbeg_(doublecomplex * ret_val, logical *reset) { /* System generated locals */ doublereal d__1, d__2; doublecomplex z__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } d__1 = (i__ - 500) / 1001.; d__2 = (j - 500) / 1001.; z__1.r = d__1, z__1.i = d__2; ret_val->r = z__1.r, ret_val->i = z__1.i; return ; /* End of ZBEG. */ } /* zbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___397 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___398 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___399 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___398.ciunit = infoc_2.nout; s_wsfe(&io___398); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___399.ciunit = infoc_2.nout; s_wsfe(&io___399); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___400.ciunit = infoc_2.nout; s_wsfe(&io___400); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int zblat3_ () { main (); return 0; } blis-1.1/blis.pc.in000066400000000000000000000004071474157777200142140ustar00rootroot00000000000000prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: BLIS Description: BLAS-like Library Instantiation Software Framework Version: @PACKAGE_VERSION@ Libs: -L${libdir} -lblis Libs.private: @LDFLAGS@ Cflags: -I${includedir}/blis blis-1.1/build/000077500000000000000000000000001474157777200134305ustar00rootroot00000000000000blis-1.1/build/add-copyright.py000077500000000000000000000254001474157777200165440ustar00rootroot00000000000000#!/usr/bin/env python3 # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Import modules import os import sys import getopt import re import subprocess import datetime def print_usage(): my_print( " " ) my_print( " %s" % script_name ) my_print( " " ) my_print( " Field G. Van Zee" ) my_print( " " ) my_print( " Update copyright lines of all created or modified source files currently" ) my_print( " staged in the git index, and also insert new copyright lines where they" ) my_print( " currently are missing. This script targets copyright lines for one" ) my_print( " organization at a time." ) my_print( " " ) my_print( " Usage:" ) my_print( " " ) my_print( " %s [options]" % script_name ) my_print( " " ) my_print( " Arguments:" ) my_print( " " ) my_print( " " ) my_print( " The following options are accepted:" ) my_print( " " ) my_print( " -o org organization name" ) my_print( " Update and add copyrights for an organization named ." ) my_print( " By default, is 'Advanced Micro Devices, Inc.'" ) my_print( " " ) my_print( " -u update only" ) my_print( " Update existing copyrights to reflect the current year," ) my_print( " but do not add any additional copyright lines. With this" ) my_print( " option, the script still only updates copyright lines for" ) my_print( " the specified (or default) organization. The default is" ) my_print( " to update but also add copyright lines where missing." ) my_print( " " ) my_print( " -d dry run" ) my_print( " Go through all of the motions, but don't actually modify" ) my_print( " any files. The default behavior is to not enable dry run." ) my_print( " " ) my_print( " -q quiet" ) my_print( " Do not output feedback while processing each file. The" ) my_print( " default behavior is to output one line of text to stdout" ) my_print( " per file updated." ) my_print( " " ) my_print( " -h help" ) my_print( " Output this information and exit." ) my_print( " " ) # ------------------------------------------------------------------------------ def my_print( s ): sys.stdout.write( "%s\n" % s ) #sys.stdout.flush() def my_echo( s ): if not quiet: sys.stdout.write( "%s: %s\n" % ( output_name, s ) ) #sys.stdout.flush() # ------------------------------------------------------------------------------ def main(): global script_name global output_name global quiet # Obtain the script name. path, script_name = os.path.split(sys.argv[0]) output_name = script_name # Default values for optional arguments. the_org = 'Advanced Micro Devices, Inc.' update_only = False dry_run = False quiet = False # Process our command line options. try: opts, args = getopt.getopt( sys.argv[1:], "do:uhq" ) except getopt.GetoptError as err: # print help information and exit: my_print( str(err) ) # will print something like "option -a not recognized" print_usage() sys.exit(2) for opt, optarg in opts: if opt == "-o": the_org = optarg elif opt == "-u": update_only = True elif opt == "-d": dry_run = True elif opt == "-q": quiet = True elif opt == "-h": print_usage() sys.exit() else: print_usage() sys.exit() # Print usage if we don't have exactly zero arguments. if len( args ) != 0: print_usage() sys.exit() # Acquire our only mandatory argument. #driverfile = args[0] # Query the current year. the_time = datetime.datetime.now() cur_year = str(the_time.year) # We run 'git status' with --porcelain to make the output easily parseable. gitstatus = 'git status --porcelain' # Run the 'git status' command and capture the output. p = subprocess.run( gitstatus, stdout=subprocess.PIPE, shell=True ) git_lines = p.stdout.decode().splitlines() git_num_lines = int( len( git_lines ) ) # Consider each line of output from 'git status' for i in range( git_num_lines ): # Parse the current line to find the performance value. git_line = git_lines[i] git_words = git_line.split() mod_char = git_line[0] # Check the first character of the git output. We want to only update # files that are new ('A'), modified ('M'), or renamed ('R'). if mod_char != 'A' and \ mod_char != 'M' and \ mod_char != 'R': continue # Identify the filename for the current line of 'git status' output. if mod_char == 'R': # For renamed files, we need to reference them by their new names, # which appear after the "->" char sequence in git_words[2]. filename = git_words[3] else: filename = git_words[1] #my_echo( "-debug---- %s" % filename ) # Start by opening the file. (We can assume it exists since it # was found by 'git status', so no need to check for existence.) # Read all lines in the file and then close it. f = open( filename, "r" ) file_lines = f.readlines() f.close() # Concatenate all lines in the file into one string. file_string = "".join( file_lines ) # Search for an existing copyright line. has_cr = re.search( r'Copyright \(C\)', file_string ) # If the file does not have any copyright notice in it already, we # assume we don't need to update it. if not has_cr: my_echo( "[nocrline] %s" % filename ) continue # Check whether the file already has a copyright for the_org. We may # need to use this information later. has_org_cr = re.search( r'Copyright \(C\) ([0-9][0-9][0-9][0-9]), %s' % the_org, file_string ) # Initialize the list of processed (potentially modified) file lines. mod_file_lines = [] # At this point we know that the file has at least one copyright, and # has_org_cr encodes whether it already has a copyright for the_org. # We process the files that we know already have copyrights for the_org # differently from the files that do not yet have them. if has_org_cr: # Iterate through the lines in the current file. for line in file_lines: result = re.search( r'Copyright \(C\) ([0-9][0-9][0-9][0-9]), %s' % the_org, line ) # If the current line matches a copyright line for the_org... if result: # Extract the year saved as the first/only group in the # regular expression. old_year = result.group(1) # Don't need to update the year if it's already up-to-date. if old_year != cur_year: # Substitute the old year for the current year. find_line = ' %s, ' % old_year repl_line = ' %s, ' % cur_year line_ny = re.sub( find_line, repl_line, line ) my_echo( "[updated ] %s" % filename ) # Add the updated line to the running list. mod_file_lines += line_ny else: my_echo( "[up2date ] %s" % filename ) # Add the unchanged line to the running list. mod_file_lines += line else: # Add the unchanged line to the running list. mod_file_lines += line # endif result # endfor else: # Don't go any further if we're only updating existing copyright # lines. if update_only: my_echo( "[nocrline] %s" % filename ) continue num_file_lines = len( file_lines ) # Iterate through the lines in the current file. for i in range( int(num_file_lines) ): line = file_lines[i] # Only look at the next line if we are not at the last line. if i < int(num_file_lines) - 1: line_next = file_lines[i+1] else: line_next = file_lines[i] # Try to match both the current line and the next line. result = re.search( r'Copyright \(C\) ([0-9][0-9][0-9][0-9]), (.*)', line ) resnext = re.search( r'Copyright \(C\) ([0-9][0-9][0-9][0-9]), (.*)', line_next ) # Parse the results. if result: if resnext: # The current line matches but so does the next. Add the # current line unchanged to the running list. mod_file_lines += line else: # The current line matches but the next does not. Thus, # this branch only executes for the *last* copyright line # in the file. # Extract the year and organization from the matched # string. old_year = result.group(1) old_org = result.group(2) # Set up search/replace strings to convert the current # line into one that serves as copyright for the_org. find_line = '%s, %s' % (old_year, old_org) repl_line = '%s, %s' % (cur_year, the_org) line_nyno = re.sub( find_line, repl_line, line ) # Add the current line and then also insert our new # copyright line for the_org into the running list. mod_file_lines += line mod_file_lines += line_nyno my_echo( "[added ] %s" % filename ) # endif resnext else: # The current line does not match. Pass it through unchanged. mod_file_lines += line # endif result # endfor # endif has_org_cr if not dry_run: # Open the file for writing. f = open( filename, "w" ) # Join the modified file lines into a single string. final_string = "".join( mod_file_lines ) # Write the lines to the file. f.write( final_string ) # Close the file. f.close() # endif not dry_run # Return from main(). return 0 if __name__ == "__main__": main() blis-1.1/build/bli_addon.h.in000066400000000000000000000034751474157777200161320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2021, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADDON_H #define BLIS_ADDON_H #if @enable_addons@ #define BLIS_ENABLE_ADDONS #else #define BLIS_DISABLE_ADDONS #endif // Enabled addons @addon_list_includes@ #endif blis-1.1/build/bli_config.h.in000066400000000000000000000114741474157777200163100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CONFIG_H #define BLIS_CONFIG_H // Enabled configuration "family" (config_name) @config_name_define@ // Enabled sub-configurations (config_list) @config_list_defines@ // Enabled kernel sets (kernel_list) @kernel_list_defines@ #define BLIS_VERSION_STRING "@version@" #if @enable_system@ #define BLIS_ENABLE_SYSTEM #else #define BLIS_DISABLE_SYSTEM #endif #if @enable_tls@ #define BLIS_ENABLE_TLS #else #define BLIS_DISABLE_TLS #endif #if @enable_openmp@ #define BLIS_ENABLE_OPENMP #if @enable_openmp_as_def@ #define BLIS_ENABLE_OPENMP_AS_DEFAULT #endif #endif #if @enable_pthreads@ #define BLIS_ENABLE_PTHREADS #if @enable_pthreads_as_def@ #define BLIS_ENABLE_PTHREADS_AS_DEFAULT #endif #endif #if @enable_hpx@ #define BLIS_ENABLE_HPX #if @enable_hpx_as_def@ #define BLIS_ENABLE_HPX_AS_DEFAULT #endif #endif #if @enable_jrir_slab@ #define BLIS_ENABLE_JRIR_SLAB #endif #if @enable_jrir_rr@ #define BLIS_ENABLE_JRIR_RR #endif #if @enable_jrir_tlb@ #define BLIS_ENABLE_JRIR_TLB #endif #if @enable_pba_pools@ #define BLIS_ENABLE_PBA_POOLS #else #define BLIS_DISABLE_PBA_POOLS #endif #if @enable_sba_pools@ #define BLIS_ENABLE_SBA_POOLS #else #define BLIS_DISABLE_SBA_POOLS #endif #if @enable_mem_tracing@ #define BLIS_ENABLE_MEM_TRACING #else #define BLIS_DISABLE_MEM_TRACING #endif #if @enable_scalapack_compat@ #define BLIS_ENABLE_SCALAPACK_COMPAT #else #define BLIS_DISABLE_SCALAPACK_COMPAT #endif #if @int_type_size@ == 64 #define BLIS_INT_TYPE_SIZE 64 #elif @int_type_size@ == 32 #define BLIS_INT_TYPE_SIZE 32 #else // determine automatically #endif #if @blas_int_type_size@ == 64 #define BLIS_BLAS_INT_TYPE_SIZE 64 #elif @blas_int_type_size@ == 32 #define BLIS_BLAS_INT_TYPE_SIZE 32 #else // determine automatically #endif #ifndef BLIS_ENABLE_BLAS #ifndef BLIS_DISABLE_BLAS #if @enable_blas@ #define BLIS_ENABLE_BLAS #else #define BLIS_DISABLE_BLAS #endif #endif #endif #ifndef BLIS_ENABLE_CBLAS #ifndef BLIS_DISABLE_CBLAS #if @enable_cblas@ #define BLIS_ENABLE_CBLAS #else #define BLIS_DISABLE_CBLAS #endif #endif #endif #ifndef BLIS_ENABLE_MIXED_DT #ifndef BLIS_DISABLE_MIXED_DT #if @enable_mixed_dt@ #define BLIS_ENABLE_MIXED_DT #else #define BLIS_DISABLE_MIXED_DT #endif #endif #endif #ifndef BLIS_ENABLE_MIXED_DT_EXTRA_MEM #ifndef BLIS_DISABLE_MIXED_DT_EXTRA_MEM #if @enable_mixed_dt_extra_mem@ #define BLIS_ENABLE_MIXED_DT_EXTRA_MEM #else #define BLIS_DISABLE_MIXED_DT_EXTRA_MEM #endif #endif #endif #if @enable_sup_handling@ #define BLIS_ENABLE_SUP_HANDLING #else #define BLIS_DISABLE_SUP_HANDLING #endif #if @enable_memkind@ #define BLIS_ENABLE_MEMKIND #else #define BLIS_DISABLE_MEMKIND #endif #if @enable_trsm_preinversion@ #define BLIS_ENABLE_TRSM_PREINVERSION #else #define BLIS_DISABLE_TRSM_PREINVERSION #endif #if @enable_pragma_omp_simd@ #define BLIS_ENABLE_PRAGMA_OMP_SIMD #else #define BLIS_DISABLE_PRAGMA_OMP_SIMD #endif #if @enable_sandbox@ #define BLIS_ENABLE_SANDBOX #else #define BLIS_DISABLE_SANDBOX #endif #if @enable_shared@ #define BLIS_ENABLE_SHARED #else #define BLIS_DISABLE_SHARED #endif #if @complex_return_intel@ #define BLIS_ENABLE_COMPLEX_RETURN_INTEL #else #define BLIS_DISABLE_COMPLEX_RETURN_INTEL #endif #endif blis-1.1/build/blis.h000066400000000000000000000000271474157777200145310ustar00rootroot00000000000000#include blis-1.1/build/bump-version.sh000077500000000000000000000144411474157777200164210ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # bump-version.sh # # Field G. Van Zee # print_usage() { #local script_name # Get the script name #script_name=${0##*/} # Echo usage info echo " " echo " "$script_name echo " " echo " Field G. Van Zee" echo " " echo " Performs a series of actions needed when incrementing (bumping) the" echo " BLIS version number:" echo " 1. Overwrite the version file with the version string passed" echo " into this script (new_vers)." echo " 2. Commit the updated version file." echo " 3. Create a new tag (named the same as new_vers) which refers to" echo " the commit created in (2)." echo " 4. Update the CHANGELOG file." echo " 5. Commit the updated CHANGELOG file." echo " " echo " Usage:" echo " ${script_name} [options] new_vers" echo " " echo " Arguments:" echo " " echo " new_vers The new version string." echo " " echo " Options:" echo " " echo " -d dry-run" echo " Go through all the motions, but don't actually make any" echo " changes to files or perform any git commits. Note that" echo " this will result in the commits for (2) and (5) above" echo " being equal to the initial commit in the script output." echo " -f VERSFILE version file name" echo " Update VERSFILE with new version string instead of default" echo " 'version' file." # Exit with non-zero exit status exit 1 } main() { # -- BEGIN GLOBAL VARIABLE DECLARATIONS -- # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The name of the config.mk file. configmk_file='config.mk' # The name of the CHANGELOG file. changelog_file='CHANGELOG' # The name and location of the default version file. version_file_def='build/version' # The name and location of the specified version file. version_file='' # Strings used during version query. git_commit_str='' new_version_str='' # The script name to use instead of the $0 when outputting messages. output_name='' # The git directory. gitdir='.git' # Whether we are performing a dry run or not. dry_run_flag="" # -- END GLOBAL VARIABLE DECLARATIONS -- # Process our command line options. while getopts ":dhf:" opt; do case $opt in d ) dry_run_flag="1" ;; f ) version_file=$OPTARG ;; h ) print_usage ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # If a version file name was not given, set version_file to the default # value. if [ -n "${version_file}" ]; then echo "${script_name}: version file specified: '${version_file}'." else echo "${script_name}: no version file specified; defaulting to '${version_file_def}'." version_file="${version_file_def}" fi # Check the number of arguments after command line option processing. if [ $# = "1" ]; then new_version_str=$1 echo "${script_name}: preparing to bump to version '${new_version_str}'." else print_usage fi # Check if the .git dir exists; if it does not, we do nothing. if [ -d "${gitdir}" ]; then echo "${script_name}: found '${gitdir}' directory; assuming git clone." git_commit_str=$(git describe --always) echo "${script_name}: initial commit: ${git_commit_str}." echo "${script_name}: updating version file '${version_file}'." if [ -z "$dry_run_flag" ]; then echo "${new_version_str}" > ${version_file} fi echo "${script_name}: executing: git commit -m \"Version file update (${new_version_str})\" ${version_file}." if [ -z "$dry_run_flag" ]; then git commit -m "Version file update (${new_version_str})" ${version_file} fi git_commit_str=$(git describe --always) echo "${script_name}: commit to be tagged: ${git_commit_str}." echo "${script_name}: executing: git tag ${new_version_str} ${git_commit_str}." if [ -z "$dry_run_flag" ]; then git tag ${new_version_str} ${git_commit_str} fi echo "${script_name}: updating ${changelog_file}." if [ -z "$dry_run_flag" ]; then # If 'make distclean' was run recently, we need to re-run # configure in order for 'make changelog' to work properly. if [ ! -f "${configmk_file}" ]; then ./configure auto fi make changelog fi echo "${script_name}: executing: git commit -m \"CHANGELOG update (${new_version_str})\" ${changelog_file}." if [ -z "$dry_run_flag" ]; then git commit -m "CHANGELOG update (${new_version_str})" ${changelog_file} fi git_commit_str=$(git describe --always) echo "${script_name}: latest commit: ${git_commit_str}." else echo "${script_name}: could not find '${gitdir}' directory; bailing out." fi # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-1.1/build/cblas.h000066400000000000000000000000301474157777200146560ustar00rootroot00000000000000#include blis-1.1/build/config.mk.in000066400000000000000000000210401474157777200156300ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2022, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once ifndef CONFIG_MK_INCLUDED CONFIG_MK_INCLUDED := yes # The version string. This could be the official string or a custom # string forced at configure-time. VERSION := @version@ # The shared library .so major and minor.build version numbers. SO_MAJOR := @so_version_major@ SO_MINORB := @so_version_minorbuild@ SO_MMB := $(SO_MAJOR).$(SO_MINORB) # The name of the configuration family. CONFIG_NAME := @config_name@ # The list of sub-configurations associated with CONFIG_NAME. Each # sub-configuration in CONFIG_LIST corresponds to a configuration # sub-directory in the 'config' directory. See the 'config_registry' # file for the full list of registered configurations. CONFIG_LIST := @config_list@ # This list of kernels needed for the configurations in CONFIG_LIST. # Each item in this list corresponds to a sub-directory in the top-level # 'kernels' directory. Oftentimes, this list is identical to CONFIG_LIST, # but not always. For example, if configuration X and Y use the same # kernel set X, and configuration W uses kernel set Q, and the CONFIG_LIST # might contained "X Y Z W", then the KERNEL_LIST would contain "X Z Q". KERNEL_LIST := @kernel_list@ # This list contains some number of "kernel:config" pairs, where "config" # specifies which configuration's compilation flags (CFLAGS) should be # used to compile the source code for the kernel set named "kernel". KCONFIG_MAP := @kconfig_map@ # The operating system name, which should be either 'Linux' or 'Darwin'. OS_NAME := @os_name@ # Check for whether the operating system is Windows. IS_WIN := @is_win@ IS_MSVC := @is_msvc@ # The directory path to the top level of the source distribution. When # building in-tree, this path is ".". When building out-of-tree, this path # is path used to identify the location of configure. We also allow the # includer of config.mk to override this value by setting DIST_PATH prior # to including this file. This override option is employed, for example, # when common.mk (and therefore config.mk) is included by the Makefile # local to the 'testsuite' directory, or the 'test' directory containing # individual test drivers. ifeq ($(strip $(DIST_PATH)),) DIST_PATH := @dist_path@ endif # The C compiler. CC_VENDOR := @CC_VENDOR@ CC := @CC@ # Important C compiler ranges. GCC_OT_4_9_0 := @gcc_older_than_4_9_0@ GCC_OT_6_1_0 := @gcc_older_than_6_1_0@ GCC_OT_9_1_0 := @gcc_older_than_9_1_0@ GCC_OT_10_3_0 := @gcc_older_than_10_3_0@ CLANG_OT_9_0_0 := @clang_older_than_9_0_0@ CLANG_OT_12_0_0 := @clang_older_than_12_0_0@ AOCC_OT_2_0_0 := @aocc_older_than_2_0_0@ AOCC_OT_3_0_0 := @aocc_older_than_3_0_0@ # The C++ compiler. NOTE: A C++ is typically not needed. CXX := @CXX@ # Static library indexer. RANLIB := @RANLIB@ # Archiver. AR := @AR@ # Python Interpreter PYTHON := @PYTHON@ # Preset (required) CFLAGS and LDFLAGS. These variables capture the value # of the CFLAGS and LDFLAGS environment variables at configure-time (and/or # the value of CFLAGS/LDFLAGS if either was specified on the command line). # These flags are used in addition to the flags automatically determined # by the build system. CFLAGS_PRESET := @cflags_preset@ LDFLAGS_PRESET := @ldflags_preset@ # The level of debugging info to generate. DEBUG_TYPE := @debug_type@ ENABLE_DEBUG := @enable_debug@ # Whether to compile and link the AddressSanitizer library. MK_ENABLE_ASAN := @enable_asan@ # Whether operating system support was requested via --enable-system. ENABLE_SYSTEM := @enable_system@ # The requested threading model(s). THREADING_MODEL := @threading_model@ # Whether the compiler supports "#pragma omp simd" via the -fopenmp-simd option. PRAGMA_OMP_SIMD := @pragma_omp_simd@ # The installation prefix, exec_prefix, libdir, includedir, and shareddir # values from configure tell us where to install the libraries, header files, # and public makefile fragments. We must first assign each substituted # @anchor@ to its own variable. Why? Because the subsitutions may contain # unevaluated variable expressions. For example, '@libdir@' may be replaced # with '${exec_prefix}/lib'. By assigning the anchors to variables first, and # then assigning them to their final INSTALL_* variables, we allow prefix and # exec_prefix to be used in the definitions of exec_prefix, libdir, # includedir, and sharedir. prefix := @prefix@ exec_prefix := @exec_prefix@ libdir := @libdir@ includedir := @includedir@ sharedir := @sharedir@ # Notice that we support the use of DESTDIR so that advanced users may install # to a temporary location. INSTALL_LIBDIR := $(DESTDIR)$(libdir) INSTALL_INCDIR := $(DESTDIR)$(includedir) INSTALL_SHAREDIR := $(DESTDIR)$(sharedir) #$(info prefix = $(prefix) ) #$(info exec_prefix = $(exec_prefix) ) #$(info libdir = $(libdir) ) #$(info includedir = $(includedir) ) #$(info sharedir = $(sharedir) ) #$(error .) # Whether to output verbose command-line feedback as the Makefile is # processed. ENABLE_VERBOSE := @enable_verbose@ # Whether we are building out-of-tree. BUILDING_OOT := @configured_oot@ # Whether we need to employ an alternate method for passing object files to # ar and/or the linker to work around a small value of ARG_MAX. ARG_MAX_HACK := @enable_arg_max_hack@ # Whether to build the static and shared libraries. # NOTE: The "MK_" prefix, which helps differentiate these variables from # their corresonding cpp macros that use the BLIS_ prefix. MK_ENABLE_STATIC := @enable_static@ MK_ENABLE_SHARED := @enable_shared@ # Whether to use an install_name based on @rpath. MK_ENABLE_RPATH := @enable_rpath@ # Whether to export all symbols within the shared library, even those symbols # that are considered to be for internal use only. EXPORT_SHARED := @export_shared@ # Whether to enable either the BLAS or CBLAS compatibility layers. MK_ENABLE_BLAS := @enable_blas@ MK_ENABLE_CBLAS := @enable_cblas@ # Whether libblis will depend on libmemkind for certain memory allocations. MK_ENABLE_MEMKIND := @enable_memkind@ # The names of the addons to include when building BLIS. If empty, no addons # will be included. ADDON_LIST := @addon_list@ # The name of a sandbox defining an alternative gemm implementation. If empty, # no sandbox will be used and the conventional gemm implementation will remain # enabled. SANDBOX := @sandbox@ # The name of the pthreads library. If --disable-system was given, then this # variable is set to the empty value. LIBPTHREAD := @libpthread@ # Whether we should use AMD-customized versions of certain framework files. ENABLE_AMD_FRAME_TWEAKS := @enable_amd_frame_tweaks@ # end of ifndef CONFIG_MK_INCLUDED conditional block endif blis-1.1/build/detect/000077500000000000000000000000001474157777200147005ustar00rootroot00000000000000blis-1.1/build/detect/android/000077500000000000000000000000001474157777200163205ustar00rootroot00000000000000blis-1.1/build/detect/android/bionic.h000066400000000000000000000032511474157777200177350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Detect Bionic on Android */ #if __BIONIC__ bionic #endif blis-1.1/build/detect/config/000077500000000000000000000000001474157777200161455ustar00rootroot00000000000000blis-1.1/build/detect/config/config_detect.c000066400000000000000000000062361474157777200211150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: This file will likely only ever get compiled as part of the BLIS // configure script, and therefore BLIS_CONFIGURETIME_CPUID is guaranteed to // be #defined. However, we preserve the cpp conditional for consistency with // the other three files mentioned above. #ifdef BLIS_CONFIGURETIME_CPUID // NOTE: If you need to make any changes to this cpp branch, it's probably // the case that you also need to modify bli_arch.c, bli_cpuid.c, and // bli_env.c. Don't forget to update these other files as needed! // The BLIS_ENABLE_SYSTEM macro must be defined so that the correct cpp // branch in bli_system.h is processed. (This macro is normally defined in // bli_config.h.) #define BLIS_ENABLE_SYSTEM // Use C-style static inline functions for any static inline functions that // happen to be defined by the headers below. (This macro is normally defined // in bli_config_macro_defs.h.) #define BLIS_INLINE static // Since we're not building a shared library, we can forgo the use of the // BLIS_EXPORT_BLIS annotations by #defining them to be nothing. (This macro // is normally defined in bli_config_macro_defs.h.) #define BLIS_EXPORT_BLIS #include "bli_system.h" #include "bli_type_defs.h" #include "bli_arch.h" #include "bli_cpuid.h" //#include "bli_env.h" #else #include "blis.h" #endif int main( int argc, char** argv ) { arch_t id = bli_cpuid_query_id(); const char* s = bli_arch_string( id ); printf( "%s\n", s ); return 0; } blis-1.1/build/detect/config/old/000077500000000000000000000000001474157777200167235ustar00rootroot00000000000000blis-1.1/build/detect/config/old/arch_detect.c000066400000000000000000000003131474157777200213310ustar00rootroot00000000000000#if defined(__i386) || defined(_X86) ARCH_X86 #endif #if defined(__x86_64__) || defined(__amd64__) ARCH_X86_64 #endif #if defined(__arm__) ARCH_ARM #endif #if defined(__aarch64__) ARCH_AARCH64 #endif blis-1.1/build/detect/config/old/auto-detect.sh000077500000000000000000000060371474157777200215060ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2015, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # auto-detect.sh # # Zhang Xianyi # main() { if [ clang -v > /dev/null 2>&1 ]; then CC=clang else CC=gcc fi CPUID_SRC=cpuid_x86.c CPUID_BIN=blis_cpu_detect ARCH=generic # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The path to the script. We need this to find the top-level directory # of the source distribution in the event that the user has chosen to # build elsewhere. dist_path=${0%/${script_name}} # The path to the directory in which we are building. We do this to # make explicit that we distinguish between the top-level directory # of the distribution and the directory in which we are building. cur_dirpath="." # # Detect architecture by predefined macros # out1=`$CC -E ${dist_path}/arch_detect.c` ARCH=`echo $out1 | grep -o "ARCH_[a-zA-Z0-9_]*" | head -n1` if [ $ARCH = "ARCH_X86_64" ]; then CPUID_SRC=cpuid_x86.c elif [ $ARCH = "ARCH_X86" ]; then CPUID_SRC=cpuid_x86.c elif [ $ARCH = "ARCH_ARM" ]; then CPUID_SRC=cpuid_arm.c elif [ $ARCH = "ARCH_AARCH64" ]; then # Only support armv8 now echo "armv8a" return 0 else echo "generic" return 0 fi # # Detect CPU cores # $CC -o ${cur_dirpath}/$CPUID_BIN ${dist_path}/$CPUID_SRC ${cur_dirpath}/$CPUID_BIN rm -rf ${cur_dirpath}/$CPUID_BIN # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-1.1/build/detect/config/old/cpuid_arm.c000066400000000000000000000071521474157777200210370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2015, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #define CPUNAME_REFERENCE 0 #define CPUNAME_ARMV7 1 #define CPUNAME_CORTEXA9 2 #define CPUNAME_CORTEXA15 3 static char *cpuname[] = { "reference", "armv7a", "cortex-a9", "cortex-a15", }; int get_feature(char *search) { FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL; infile = fopen("/proc/cpuinfo", "r"); if (infile == NULL) { return 0; } while (fgets(buffer, sizeof(buffer), infile)) { if (!strncmp("Features", buffer, 8)) { p = strchr(buffer, ':') + 2; break; } } fclose(infile); if( p == NULL ) return 0; t = strtok(p," "); if (t != NULL) { if (!strcmp(t, search)) { return 1; } } while( t = strtok(NULL," ")){ if (!strcmp(t, search)) { return 1; } } return 0; } int cpu_detect(void) { FILE *infile; char buffer[512], *p; p = (char *) NULL ; infile = fopen("/proc/cpuinfo", "r"); if (infile == NULL) { return CPUNAME_REFERENCE; } while (fgets(buffer, sizeof(buffer), infile)) { if (!strncmp("CPU part", buffer, 8)) { p = strchr(buffer, ':') + 2; break; } } fclose(infile); if(p != NULL) { if (strstr(p, "0xc09")) { if(get_feature("neon")) return CPUNAME_CORTEXA9; else return CPUNAME_ARMV7; } if (strstr(p, "0xc0f")) { if(get_feature("neon")) return CPUNAME_CORTEXA15; else return CPUNAME_ARMV7; } } p = (char *) NULL ; infile = fopen("/proc/cpuinfo", "r"); if (infile == NULL) { return CPUNAME_REFERENCE; } while (fgets(buffer, sizeof(buffer), infile)) { if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) { p = strchr(buffer, ':') + 2; break; } } fclose(infile); if(p != NULL) { if (strstr(p, "ARMv7")) { return CPUNAME_ARMV7; } } return CPUNAME_REFERENCE; } int main() { int cpuname_id; cpuname_id=cpu_detect(); printf("%s\n", cpuname[cpuname_id]); return 0; } blis-1.1/build/detect/config/old/cpuid_x86.c000066400000000000000000000155471474157777200207140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2015, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #define VENDOR_UNKNOWN 0 #define VENDOR_INTEL 1 #define VENDOR_AMD 2 #define CPUNAME_GENERIC 0 #define CPUNAME_PENRYN 1 #define CPUNAME_SANDYBRIDGE 2 #define CPUNAME_HASWELL 3 #define CPUNAME_KNC 4 #define CPUNAME_KNL 5 #define CPUNAME_BULLDOZER 6 #define CPUNAME_PILEDRIVER 7 #define CPUNAME_STEAMROLLER 8 #define CPUNAME_EXCAVATOR 9 #define CPUNAME_ZEN 10 static char *cpuname[] = { "generic", "penryn", "sandybridge", "haswell", "knc", "knl", "bulldozer", "piledriver", "steamroller", "excavator", "zen", }; #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ #if defined(__i386__) && defined(__PIC__) __asm__ __volatile__ ("mov %%ebx, %%edi;" "cpuid;" "xchgl %%ebx, %%edi;" : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); #else __asm__ __volatile__ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); #endif } static inline int have_cpuid(void){ int eax, ebx, ecx, edx; cpuid(0, &eax, &ebx, &ecx, &edx); return eax; } int get_vendor(void){ int eax, ebx, ecx, edx; char vendor[13]; cpuid(0, &eax, &ebx, &ecx, &edx); *(int *)(&vendor[0]) = ebx; *(int *)(&vendor[4]) = edx; *(int *)(&vendor[8]) = ecx; vendor[12] = (char)0; if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL; if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD; if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL; return VENDOR_UNKNOWN; } static inline void xgetbv(int op, int * eax, int * edx){ //Use binary code for xgetbv __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); } int support_avx(){ int eax, ebx, ecx, edx; int ret=0; cpuid(1, &eax, &ebx, &ecx, &edx); if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ xgetbv(0, &eax, &edx); if((eax & 6) == 6){ ret=1; //OS support AVX } } return ret; } int support_avx512(){ int eax, ebx, ecx, edx; int ret=0; cpuid(1, &eax, &ebx, &ecx, &edx); if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ xgetbv(0, &eax, &edx); if((eax & 0xE6) == 0xE6){ ret=1; //OS support AVX-512 } } return ret; } int cpu_detect() { int eax, ebx, ecx, edx; int vendor, family, extend_family, model, extend_model; if ( !have_cpuid() ) return CPUNAME_GENERIC; vendor = get_vendor(); cpuid( 1, &eax, &ebx, &ecx, &edx ); extend_family = BITMASK( eax, 20, 0xff ); extend_model = BITMASK( eax, 16, 0x0f ); family = BITMASK( eax, 8, 0x0f ); model = BITMASK( eax, 4, 0x0f ); if (vendor == VENDOR_INTEL){ model |= extend_model<<4; switch (family) { case 0x6: switch (model) { case 0x0F: //Core2 case 0x16: //Core2 case 0x17: //Penryn case 0x1D: //Penryn case 0x1A: //Nehalem case 0x1E: //Nehalem case 0x2E: //Nehalem case 0x25: //Westmere case 0x2C: //Westmere case 0x2F: //Westmere return CPUNAME_PENRYN; case 0x2A: //Sandy Bridge case 0x2D: //Sandy Bridge case 0x3A: //Ivy Bridge case 0x3E: //Ivy Bridge if(support_avx()) { return CPUNAME_SANDYBRIDGE; }else{ return CPUNAME_GENERIC; //OS doesn't support AVX } case 0x3C: //Haswell case 0x3F: //Haswell case 0x3D: //Broadwell case 0x47: //Broadwell case 0x4F: //Broadwell case 0x56: //Broadwell case 0x4E: //Skylake case 0x5E: //Skylake if(support_avx()) { return CPUNAME_HASWELL; }else{ return CPUNAME_GENERIC; //OS doesn't support AVX } case 0x57: //KNL if(support_avx512()) { return CPUNAME_KNL; }else{ return CPUNAME_GENERIC; //OS doesn't support AVX } } break; case 0xB: switch (model) { case 0x01: //KNC return CPUNAME_KNC; } } }else if (vendor == VENDOR_AMD){ switch (family) { case 0xf: switch (extend_family) { case 6: switch (model) { case 1: if(support_avx()) return CPUNAME_BULLDOZER; else return CPUNAME_GENERIC; //OS don't support AVX. case 2: if(support_avx()) return CPUNAME_PILEDRIVER; else return CPUNAME_GENERIC; //OS don't support AVX. case 0: // Steamroller. Temp use Piledriver. if(support_avx()) return CPUNAME_STEAMROLLER; else return CPUNAME_GENERIC; //OS don't support AVX. } case 8: switch (model){ case 1: if(support_avx()) return CPUNAME_ZEN; else return CPUNAME_REFERENCE; //OS don't support AVX. } } break; } } return CPUNAME_GENERIC; } int main() { int cpuname_id; cpuname_id=cpu_detect(); printf("%s\n", cpuname[cpuname_id]); return 0; } blis-1.1/build/detect/iset/000077500000000000000000000000001474157777200156445ustar00rootroot00000000000000blis-1.1/build/detect/iset/avx.s000066400000000000000000000001431474157777200166240ustar00rootroot00000000000000// // Test for AVX instruction set. // vzeroall vmovapd %ymm0, %ymm1 vmulpd %ymm0, %ymm0, %ymm1 blis-1.1/build/detect/iset/avx512dq.s000066400000000000000000000001561474157777200174050ustar00rootroot00000000000000// // Test for AVX-512dq instruction set. // vzeroall vpmullq %zmm0, %zmm0, %zmm1 vpmullw %zmm0, %zmm0, %zmm1 blis-1.1/build/detect/iset/avx512f.s000066400000000000000000000002321474157777200172210ustar00rootroot00000000000000// // Test for AVX-512f instruction set. // vzeroall vmovapd %zmm0, %zmm1 vmulpd %zmm0, %zmm0, %zmm1 vfmadd213pd 0x400(%rax,%rsi,8) {1to8}, %zmm1, %zmm2 blis-1.1/build/detect/iset/fma3.s000066400000000000000000000001211474157777200166500ustar00rootroot00000000000000// // Test for FMA3 instruction set. // vzeroall vfmadd213pd %ymm0, %ymm1, %ymm2 blis-1.1/build/detect/iset/fma4.s000066400000000000000000000001521474157777200166550ustar00rootroot00000000000000// // Test for FMA4 instruction set (AMD Bulldozer only). // vzeroall vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3 blis-1.1/build/detect/memkind/000077500000000000000000000000001474157777200163245ustar00rootroot00000000000000blis-1.1/build/detect/memkind/libmemkind_detect.c000066400000000000000000000002761474157777200221400ustar00rootroot00000000000000#include #include int main( int argc, char **argv ) { void* p = hbw_malloc( 4096 ); printf( "%s: hbw_malloc() returned %p\n", __FILE__, p ); return 0; } blis-1.1/build/detect/omp_simd/000077500000000000000000000000001474157777200165075ustar00rootroot00000000000000blis-1.1/build/detect/omp_simd/omp_simd_detect.c000066400000000000000000000007311474157777200220130ustar00rootroot00000000000000#include #include #define ARRAY_LEN 4096 double x[ ARRAY_LEN ]; double y[ ARRAY_LEN ]; int main( int argc, char **argv ) { const double alpha = 2.1; for ( int i = 0; i < ARRAY_LEN; ++i ) { y[ i ] = 0.0; x[ i ] = 1.0; } #pragma omp simd for ( int i = 0; i < ARRAY_LEN; ++i ) { y[ i ] += alpha * x[ i ]; } #if 0 _Pragma( "omp simd" ) for ( int i = 0; i < ARRAY_LEN; ++i ) { x[ i ] += alpha * y[ i ]; } #endif return 0; } blis-1.1/build/detect/riscv/000077500000000000000000000000001474157777200160265ustar00rootroot00000000000000blis-1.1/build/detect/riscv/bli_riscv_cpuid.h000066400000000000000000000052631474157777200213450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* RISC-V autodetection code which works with native or cross-compilers. Compile with $CC -E and ignore all output lines starting with #. On RISC-V it may return rv32i (base 32-bit integer RISC-V), rv32iv (rv32i plus vector extensions), rv64i (base 64-bit integer RISC-V), or rv64iv (rv64i plus vector extensions). On 128-bit integer RISC-V, it falls back to generic for now. For toolchains which do not yet support RISC-V feature-detection macros, it will fall back on generic, so the BLIS configure script may need the RISC-V configuration to be explicitly specified. */ // false if !defined(__riscv) || !defined(__riscv_xlen) #if __riscv && __riscv_xlen == 64 #if __riscv_vector // false if !defined(__riscv_vector) rv64iv #else rv64i #endif // false if !defined(__riscv) || !defined(__riscv_xlen) || __riscv_e32 != 0 #elif __riscv && __riscv_xlen == 32 && !__riscv_e32 #if __riscv_vector // false if !defined(__riscv_vector) rv32iv #else rv32i #endif #else generic // fall back on BLIS runtime CPUID autodetection algorithm #endif blis-1.1/build/detect/riscv/bli_riscv_detect_abi.h000066400000000000000000000042311474157777200223160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Construct a RISC-V ABI string based on available features. */ #if __riscv #define CAT2(a,b) a##b #define CAT(a,b) CAT2(a,b) #if __riscv_xlen == 32 #define RISCV_INT_ABI ilp32 #else #define RISCV_INT_ABI lp64 #endif #if __riscv_abi_rve CAT(RISCV_INT_ABI, e) #elif __riscv_float_abi_soft RISCV_INT_ABI #elif __riscv_float_abi_single CAT(RISCV_INT_ABI, f) #elif __riscv_float_abi_double CAT(RISCV_INT_ABI, d) #elif __riscv_float_abi_quad CAT(RISCV_INT_ABI, q) #else #error "Unknown RISC-V ABI" #endif #endif /* __riscv */ blis-1.1/build/detect/riscv/bli_riscv_detect_arch.h000066400000000000000000000107171474157777200225060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Construct a RISC-V architecture string based on available features. */ #if __riscv #if __riscv_arch_test #if __riscv_i #define RISCV_I i #else #define RISCV_I #endif #if __riscv_e #define RISCV_E e #else #define RISCV_E #endif #if __riscv_m #define RISCV_M m #else #define RISCV_M #endif #if __riscv_a #define RISCV_A a #else #define RISCV_A #endif #if __riscv_f #define RISCV_F f #else #define RISCV_F #endif #if __riscv_d #define RISCV_D d #else #define RISCV_D #endif #if __riscv_flen >= 128 #define RISCV_Q q #else #define RISCV_Q #endif #if __riscv_c #define RISCV_C c #else #define RISCV_C #endif #if __riscv_p #define RISCV_P p #else #define RISCV_P #endif /* FORCE_RISCV_VECTOR is a Clang workaround */ #if __riscv_v || FORCE_RISCV_VECTOR #define RISCV_V v #else #define RISCV_V #endif /* No test currently for Zicsr, which was removed from the base ISA, but F implies Zicsr */ #if __riscv_f #define RISCV_ZICSR _zicsr #else #define RISCV_ZICSR #endif /* No test currently for Zifencei, which was removed from the base ISA */ #define RISCV_ZIFENCEI #if __riscv_zba #define RISCV_ZBA _zba #else #define RISCV_ZBA #endif #if __riscv_zbb #define RISCV_ZBB _zbb #else #define RISCV_ZBB #endif #if __riscv_zbc #define RISCV_ZBC _zbc #else #define RISCV_ZBC #endif #if __riscv_zbs #define RISCV_ZBS _zbs #else #define RISCV_ZBS #endif #if __riscv_zfh #define RISCV_ZFH _zfh #else #define RISCV_ZFH #endif #else /* __riscv_arch_test */ /* We assume I and E are exclusive when __riscv_arch_test isn't defined */ #if __riscv_32e #define RISCV_I #define RISCV_E e #else #define RISCV_I i #define RISCV_E #endif #if __riscv_mul #define RISCV_M m #else #define RISCV_M #endif #if __riscv_atomic #define RISCV_A a #else #define RISCV_A #endif #if __riscv_flen >= 32 #define RISCV_F f #else #define RISCV_F #endif #if __riscv_flen >= 64 #define RISCV_D d #else #define RISCV_D #endif #if __riscv_flen >= 128 #define RISCV_Q q #else #define RISCV_Q #endif #if __riscv_compressed #define RISCV_C c #else #define RISCV_C #endif #define RISCV_P /* FORCE_RISCV_VECTOR is a Clang workaround */ #if __riscv_vector || FORCE_RISCV_VECTOR #define RISCV_V v #else #define RISCV_V #endif /* No test currently for Zicsr, which was removed from the base ISA, but F implies Zicsr */ #if __riscv_flen >= 32 #define RISCV_ZICSR _zicsr #else #define RISCV_ZICSR #endif #define RISCV_ZIFENCEI #define RISCV_ZBA #define RISCV_ZBB #define RISCV_ZBC #define RISCV_ZBS #define RISCV_ZFH #endif /* __riscv_arch_test */ #define CAT2(a,b) a##b #define CAT(a,b) CAT2(a,b) CAT(rv, CAT(__riscv_xlen, CAT(RISCV_I, CAT(RISCV_E, CAT(RISCV_M, CAT(RISCV_A, CAT(RISCV_F, CAT(RISCV_D, CAT(RISCV_Q, CAT(RISCV_C, CAT(RISCV_P, CAT(RISCV_V, CAT(RISCV_ZICSR, CAT(RISCV_ZIFENCEI, CAT(RISCV_ZBA, CAT(RISCV_ZBB, CAT(RISCV_ZBC, CAT(RISCV_ZBS, RISCV_ZFH)))))))))))))))))) #endif /* __riscv */ blis-1.1/build/flatten-headers.py000077500000000000000000000405461474157777200170640ustar00rootroot00000000000000#!/usr/bin/env python # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Import modules import os import sys import getopt import re def print_usage(): my_print( " " ) my_print( " %s" % script_name ) my_print( " " ) my_print( " Field G. Van Zee" ) my_print( " " ) my_print( " Generate a monolithic header by recursively replacing all #include" ) my_print( " directives in a selected file with the contents of the header files" ) my_print( " they reference." ) my_print( " " ) my_print( " Usage:" ) my_print( " " ) my_print( " %s header header_out temp_dir dir_list" % script_name ) my_print( " " ) my_print( " Arguments:" ) my_print( " " ) my_print( " header The filepath to the top-level header, which is the file" ) my_print( " that will #include all other header files." ) my_print( " " ) my_print( " header_out The filepath of the file into which the script will output" ) my_print( " the monolithic header." ) my_print( " " ) my_print( " temp_dir A directory in which temporary files may be created." ) my_print( " NOTE: No temporary files are created in the current" ) my_print( " implementation, but this argument must still be specified." ) my_print( " " ) my_print( " dir_list The list of directory paths in which to search for the" ) my_print( " headers that are #included by 'header'. By default, these" ) my_print( " directories are scanned for .h files, but sub-directories" ) my_print( " within the various directories are not inspected. If the" ) my_print( " -r option is given, these directories are recursively" ) my_print( " scanned. In either case, the subset of directories scanned" ) my_print( " that actually contains .h files is then searched whenever" ) my_print( " a #include directive is encountered in 'header' (or any" ) my_print( " file subsequently #included). If a referenced header file" ) my_print( " is not found, the #include directive is left untouched and" ) my_print( " translated directly into 'header_out'." ) my_print( " " ) my_print( " The following options are accepted:" ) my_print( " " ) my_print( " -r recursive" ) my_print( " Scan the directories listed in 'dir_list' recursively when" ) my_print( " searching for .h header files. By default, the directories" ) my_print( " are not searched recursively." ) my_print( " " ) my_print( " -c strip C-style comments" ) my_print( " Strip comments enclosed in /* */ delimiters from the" ) my_print( " output, including multi-line comments. By default, C-style" ) my_print( " comments are not stripped." ) my_print( " " ) my_print( " -o SCRIPT output script name" ) my_print( " Use SCRIPT as a prefix when outputting messages instead" ) my_print( " the script's actual name. Useful when the current script" ) my_print( " is going to be called from within another, higher-level" ) my_print( " driver script and seeing the current script's name might" ) my_print( " unnecessarily confuse the user." ) my_print( " " ) my_print( " -v [0|1|2] verboseness level" ) my_print( " level 0: silent (no output)" ) my_print( " level 1: default (single character '.' per header)" ) my_print( " level 2: verbose (several lines per header)." ) my_print( " " ) my_print( " -h help" ) my_print( " Output this information and exit." ) my_print( " " ) # ------------------------------------------------------------------------------ def canonicalize_ws( s ): return re.sub( r'\s+', ' ', s ).strip() # --- def my_print( s ): sys.stdout.write( "%s\n" % s ) # --- #def echov1( s ): # # if verbose_flag == "1": # print "%s: %s" % ( output_name, s ) def echov1_n( s ): if verbose_flag == "1": sys.stdout.write( s ) sys.stdout.flush() def echov1_n2( s ): if verbose_flag == "1": sys.stdout.write( "%s\n" % s ) sys.stdout.flush() # --- def echov2( s ): if verbose_flag == "2": sys.stdout.write( "%s: %s\n" % ( output_name, s ) ) sys.stdout.flush() def echov2_n( s ): if verbose_flag == "2": sys.stdout.write( output_name ) sys.stdout.write( ": " ) sys.stdout.write( s ) sys.stdout.flush() def echov2_n2( s ): if verbose_flag == "2": sys.stdout.write( "%s\n" % s ) sys.stdout.flush() # ------------------------------------------------------------------------------ def list_contains_header( items ): rval = False for item in items: is_h = re.search( r"\.h", item ) if is_h: rval = True break return rval # ------------------------------------------------------------------------------ def get_header_path( filename, header_dirpaths ): filepath = None # Search each directory path for the filename given. for dirpath in header_dirpaths: # Construct a possible path to the sought-after file. cur_filepath = "%s/%s" % ( dirpath, filename ) # Check whether the file exists. found = os.path.exists( cur_filepath ) if found: filepath = cur_filepath break return filepath # ------------------------------------------------------------------------------ def strip_cstyle_comments( string ): return re.sub( r"/\*.*?\*/", "", string, flags=re.S ) # ------------------------------------------------------------------------------ def flatten_header( inputfile, header_dirpaths, cursp ): # This string is inserted after #include directives after having # determined that they are not present in the directory tree. skipstr = "// skipped" beginstr = "// begin " endstr = "// end " ostring = "" # Open the input file to process. ifile = open( inputfile, "r" ) # A counter to track the line number being parsed within the current file. # This counter, when selectively encoded into the flattened header via #line # directives, facilitates easier debugging. (When the compiler finds an # issue, it will be able to refer to the line number within the constituent # header file rather than the flattened one.) lineno = 0 # Iterate over the lines in the file. while True: # Increment the line number. lineno += 1 # Read a line in the file. line = ifile.readline() # Check for EOF. if line == '': break # Check for the #include directive and isolate the header name within # a group (parentheses). #result = re.search( '^[\s]*#include (["<])([\w\.\-/]*)([">])', line ) result = regex.search( line ) # If the line contained a #include directive, we must try to replace # it with the contents of the header referenced by the directive. if result: # Extract the header file referenced in the #include directive, # saved as the second group in the regular expression # above. header = result.group(2) echov2( "%sfound reference to '%s'." % ( cursp, header ) ) # Search for the path to the header referenced in the #include # directive. header_path = get_header_path( header, header_dirpaths ) # First, check if the header is our root header (and if so, ignore it). # Otherwise, if the header was found, we recurse. Otherwise, we output # the #include directive with a comment indicating that it as skipped if header == root_inputfile: markl = result.group(1) markr = result.group(3) echov2( "%sthis is the root header '%s'; commenting out / skipping." \ % ( cursp, header ) ) # If the header found is our root header, then we cannot # recurse into it lest we enter an infinite loop. Output the # line but make sure it's commented out entirely. ostring += "%s #include %c%s%c %c" \ % ( skipstr, markl, header, markr, '\n' ) elif header_path: echov2( "%slocated file '%s'; recursing." \ % ( cursp, header_path ) ) # Mark the beginning of the header being inserted. ostring += "%s%s%c" % ( beginstr, header, '\n' ) if line_numbers: ostring += "#line %d \"%s\"%c\n" % ( 1, header_path, '\n' ) # Recurse on the header, accumulating the string. ostring += flatten_header( header_path, header_dirpaths, cursp + " " ) # Mark the end of the header being inserted. ostring += "%s%s%c" % ( endstr, header, '\n' ) if line_numbers: ostring += "#line %d \"%s\"%c\n" % ( lineno+1, inputfile, '\n' ) echov2( "%sheader file '%s' fully processed." \ % ( cursp, header_path ) ) else: markl = result.group(1) markr = result.group(3) echov2( "%scould not locate file '%s'; marking as skipped." \ % ( cursp, header ) ) # If the header was not found, output the line with a # comment that the header was skipped. ostring += "#include %c%s%c %s%c" \ % ( markl, header, markr, skipstr, '\n' ) # endif else: # If the line did not contain a #include directive, simply output # the line verbatim. ostring += "%s" % line # endif # endwhile # Close the input file. ifile.close() echov1_n( "." ) return ostring # ------------------------------------------------------------------------------ def find_header_dirs( dirpath ): header_dirpaths = [] for root, dirs, files in os.walk( dirpath, topdown=True ): echov2_n( "scanning contents of %s" % root ) if list_contains_header( files ): echov2_n2( "...found headers" ) header_dirpaths.append( root ) else: echov2_n2( "" ) #endif #endfor return header_dirpaths # ------------------------------------------------------------------------------ # Global variables. script_name = None output_name = None strip_comments = None recursive_flag = None line_numbers = None verbose_flag = None regex = None root_inputfile = None def main(): global script_name global output_name global strip_comments global recursive_flag global line_numbers global verbose_flag global regex global root_inputfile # Obtain the script name. path, script_name = os.path.split(sys.argv[0]) output_name = script_name strip_comments = False recursive_flag = False line_numbers = False verbose_flag = "1" nestsp = " " # Process our command line options. try: opts, args = getopt.getopt( sys.argv[1:], "o:rclhv:" ) except getopt.GetoptError as err: # print help information and exit: my_print( str(err) ) # will print something like "option -a not recognized" print_usage() sys.exit(2) for opt, optarg in opts: if opt == "-o": output_name = optarg elif opt == "-r": recursive_flag = True elif opt == "-l": line_numbers = True elif opt == "-c": strip_comments = True elif opt == "-v": verbose_flag = optarg elif opt == "-h": print_usage() sys.exit() else: print_usage() sys.exit() if line_numbers and strip_comments: my_print( "WARNING: stripping comments will result in inaccurate line numbers" ) # Make sure that the verboseness level is valid. if ( verbose_flag != "0" and verbose_flag != "1" and verbose_flag != "2" ): my_print( "%s Invalid verboseness argument: %s" \ % output_name, verbose_flag ) sys.exit() # Print usage if we don't have exactly four arguments. if len( args ) != 4: print_usage() sys.exit() # Acquire the four required arguments: # - the input header file, # - the output header file, # - the temporary directory in which we can write intermediate files, # - the list of directories in which to search for the headers. inputfile = args[0] outputfile = args[1] temp_dir = args[2] dir_list = args[3] # Save the filename (basename) part of the input file (or root file) into a # global variable that we can access later from within flatten_header(). root_inputfile = os.path.basename( inputfile ) # Separate the directories into distinct strings. dir_list = dir_list.split() # First, confirm that the directories in dir_list are valid. dir_list_checked = [] for item in dir_list: #absitem = os.path.abspath( item ) echov2_n( "checking " + item ) if os.path.exists( item ): dir_list_checked.append( item ) echov2_n2( "...directory exists." ) else: echov2_n2( "...invalid directory; omitting." ) # endfor # Overwrite the original dir_list with the updated copy that omits # invalid directories. dir_list = dir_list_checked echov2( "check summary:" ) echov2( " accessible directories:" ) echov2( " %s" % ' '.join( dir_list ) ) # Generate a list of directories (header_dirpaths) which will be searched # whenever a #include directive is encountered. The method by which # header_dirpaths is compiled will depend on whether the recursive flag # was given. if recursive_flag: header_dirpaths = [] for d in dir_list: # For each directory in dir_list, recursively walk that directory # and return a list of directories that contain headers. d_dirpaths = find_header_dirs( d ) # Add the list resulting from the current search to the running # list of directory paths that contain headers. header_dirpaths += d_dirpaths # endfor else: # If the recursive flag was not given, we can just use dir_list # as-is, though we opt to filter out the directories that don't # contain .h files. header_dirpaths = [] for d in dir_list: echov2_n( "scanning %s" % d ) # Acquire a list of the directory's contents. sub_items = os.listdir( d ) # If there is at least one header present, add the current # directory to the list of header directories. if list_contains_header( sub_items ): header_dirpaths.append( d ) echov2_n2( "...found headers." ) else: echov2_n2( "...no headers found." ) # endif # endfor # endfor echov2( "scan summary:" ) echov2( " headers found in:" ) echov2( " %s" % ' '.join( header_dirpaths ) ) echov2( "preparing to monolithify '%s'" % inputfile ) echov2( "new header will be saved to '%s'" % outputfile ) echov1_n( "." ) # Open the output file. ofile = open( outputfile, "w" ) # Precompile the main regular expression used to isolate #include # directives and the headers they reference. This regex object will # get reused over and over again in flatten_header(). regex = re.compile( r'^[\s]*#include (["<])([\w\.\-/]*)([">])' ) # Recursively substitute headers for occurrences of #include directives. final_string = flatten_header( inputfile, header_dirpaths, nestsp ) # Strip C-style comments from the final output, if requested. if strip_comments: final_string = strip_cstyle_comments( final_string ) # Write the lines to the file. ofile.write( final_string ) # Close the output file. ofile.close() echov2( "substitution complete." ) echov2( "monolithic header saved as '%s'" % outputfile ) echov1_n2( "." ) return 0 if __name__ == "__main__": main() blis-1.1/build/flatten-headers.sh000077500000000000000000000435711474157777200170470ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # -- Helper functions ---------------------------------------------------------- # print_usage() { # Echo usage info. echo " " echo " ${script_name}" echo " " echo " Field G. Van Zee" echo " " echo " Generate a monolithic header by recursively replacing all #include" echo " directives in a selected file with the contents of the header files" echo " they reference." echo " " echo " Usage:" echo " " echo " ${script_name} header header_out temp_dir dir_list" echo " " echo " Arguments:" echo " " echo " header The filepath to the top-level header, which is the file" echo " that will #include all other header files." echo " " echo " header_out The filepath of the file into which the script will output" echo " the monolithic header." echo " " echo " temp_dir A directory in which temporary files may be created." echo " " echo " dir_list The list of directory paths in which to search for the" echo " headers that are #included by 'header'. By default, these" echo " directories are scanned for .h files, but sub-directories" echo " within the various directories are not inspected. If the" echo " -r option is given, these directories are recursively" echo " scanned. In either case, the subset of directories scanned" echo " that actually contains .h files is then searched whenever" echo " a #include directive is encountered in 'header' (or any" echo " file subsequently #included). If a referenced header file" echo " is not found, the #include directive is left untouched and" echo " translated directly into 'header_out'." echo " " echo " The following options are accepted:" echo " " echo " -r recursive" echo " Scan the directories listed in 'dir_list' recursively when" echo " searching for .h header files. By default, the directories" echo " are not searched recursively." echo " " echo " -c strip C-style comments" echo " Strip comments enclosed in /* */ delimiters from the" echo " output, including multi-line comments. By default, C-style" echo " comments are not stripped." echo " " echo " -o SCRIPT output script name" echo " Use SCRIPT as a prefix when outputting messages instead" echo " the script's actual name. Useful when the current script" echo " is going to be called from within another, higher-level" echo " driver script and seeing the current script's name might" echo " unnecessarily confuse the user." echo " " echo " -v [0|1|2] verboseness level" echo " level 0: silent (no output)" echo " level 1: default (single character '.' per header)" echo " level 2: verbose (several lines per header)." echo " " echo " -h help" echo " Output this information and exit." echo " " # Exit with non-zero exit status exit 1 } canonicalize_ws() { local str="$1" # Remove leading and trailing whitespace. str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') # Remove duplicate spaces between words. str=$(echo -e "${str}" | tr -s " ") # Update the input argument. echo "${str}" } is_word_in_list() { word="$1" list="$2" rval="" for item in ${list}; do if [ "${item}" == "${word}" ]; then rval="${word}" break fi done echo "${rval}" } echovo() { if [ "${verbose_flag}" == "1" ]; then # Echo the argument string to stderr instead of stdout. echo "${output_name}: $1" 1>&2; fi } echovo_n() { if [ "${verbose_flag}" == "1" ]; then # Echo the argument string to stderr instead of stdout. echo -n "$1" 1>&2; fi } echovo_n2() { if [ "${verbose_flag}" == "1" ]; then # Echo the argument string to stderr instead of stdout. echo "$1" 1>&2; fi } # --- echovt() { if [ "${verbose_flag}" == "2" ]; then # Echo the argument string to stderr instead of stdout. echo "${output_name}: $1" 1>&2; fi } echovt_n() { if [ "${verbose_flag}" == "2" ]; then # Echo the argument string to stderr instead of stdout. echo -n "${output_name}: $1" 1>&2; fi } echovt_n2() { if [ "${verbose_flag}" == "2" ]; then # Echo the argument string to stderr instead of stdout. echo "$1" 1>&2; fi } find_header_dirs() { local cur_dirpath sub_items result cur_list item child_list # Extract the argument: the current directory, and the list of # directories found so far that contain headers. cur_dirpath="$1" echovt_n "scanning contents of ${cur_dirpath}" # Acquire a list of the directory's contents. sub_items=$(ls ${cur_dirpath}) # If there is at least one header present, add the current directory to # the list header of directories. Otherwise, the current directory does # not contribute to the list returned to the caller. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then cur_list="${cur_dirpath}" echovt_n2 " ...found headers" else cur_list="" echovt_n2 "" fi # Iterate over the list of directory contents. for item in ${sub_items}; do # Check whether the current item is in the ignore_list. If so, we # ignore it. result=$(is_word_in_list "${item}" "${ignore_list}") if [ -n "${result}" ]; then echovt "ignoring directory '${item}'." continue fi # If the current item is a directory, recursively accumulate header # directories for that sub-directory. if [ -d "${cur_dirpath}/${item}" ]; then # Recursively find header directories within the sub-directory # ${item} and store the directory list to child_list. child_list=$(find_header_dirs "${cur_dirpath}/${item}") # Accumulate the sub-directory's header list with the running list # of header directories cur_list="${cur_list} ${child_list}" fi done # Return the list of header directories. echo "${cur_list}" } get_header_path() { local filename dirpaths filepath filename="$1" dirpaths="$2" filepath="" # Search each directory path for the filename given. for dirpath in ${dirpaths}; do if [ -f "${dirpath}/${filename}" ]; then filepath="${dirpath}/${filename}" break fi done # Return the filepath that was found. Note that if no filepath was found # in the loop above, the empty string gets returned. echo "${filepath}" } replace_pass() { local inputfile dirpaths intermfile skipstr commstr result local header headerlist header_filepath header_esc subintermfile inputfile="$1" dirpaths="$2" cursp="$3" # Set the output filename, which we will return to the caller. Starting # with the input filepath, we strip it down to just the filename and # reconstruct it with the .interm suffix in temp_dir. intermfile="${inputfile##*/}" intermfile="${temp_dir}/${intermfile}.interm" # This string is inserted after #include directives after having # determined that they are not present in the directory tree. skipstr="\/\/ skipped" # Initialize the list of headers referenced in #include directives # found in the current header file. headerlist="" result=$(grep '^[[:space:]]*#include ' ${inputfile}) # Only iterate through the file line-by-line if it contains at least # one #include directive. If it does not contain any #include directives, # then we can leave headerlist initialized to empty and proceed. if [ -n "${result}" ]; then # Iterate through each line of the header file, accumulating the names of # header files referenced in #include directives. while read -r curline do # Check whether the line begins with a #include directive, but ignore # the line if it contains the skip string. result=$(echo ${curline} | grep '^[[:space:]]*#include ') # If the #include directive was found... if [ -n "${result}" ]; then # Isolate the header filename. We must take care to include all # characters that might appear between the "" or <>. header=$(echo ${curline} | sed -e "s/#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g") # Add the header file to a list. headerlist=$(canonicalize_ws "${headerlist} ${header}") fi done < "${inputfile}" fi if [ -n "${headerlist}" ]; then echovt "${cursp}found references to: ${headerlist}" else echovt "${cursp}no header references found." fi # Before we go any further, we strip C-style comments from the file, # if requested. if [ -n "${strip_comments}" ]; then # Make a copy of inputfile stripped of its C-style comments and # save it to intermfile. This substitution leaves behind a single # blank line. cat ${inputfile} \ | perl -0777 -pe "s/\/\*.*?\*\///gs" \ > "${intermfile}" else # Otherwise, just copy inputfile to intermfile verbatim. cp ${inputfile} ${intermfile} fi # Iterate over each header file found in the previous loop. for header in ${headerlist}; do # Find the path to the header. header_filepath=$(get_header_path ${header} "${dirpaths}") # If the header has a slash, escape it so that sed doesn't get confused # (since we use '/' as our search-and-replace delimiter). header_esc=$(echo "${header}" | sed -e 's/\//\\\//g') # If the header file was not found, get_header_path() returns an # empty string. This probably means that the header file is a # system header and thus we skip it since we don't want to inline # the contents of system headers anyway. if [ -z "${header_filepath}" ]; then echovt "${cursp}could not locate file '${header}'; marking as skipped." # Insert a comment after the #include so we know it was ignored. # Notice that we mimic the quotes or angle brackets around the # header name, whichever pair was used in the input. cat ${intermfile} \ | sed -e "s/^[[:space:]]*#include \([\"<]\)\(${header_esc}\)\([\">]\).*/#include \1\2\3 ${skipstr}/" \ > "${intermfile}.tmp" mv "${intermfile}.tmp" ${intermfile} else echovt "${cursp}located file '${header_filepath}'; recursing." # Recursively produce an inlined/flattened intermediate file at # ${header_filepath}. subintermfile=$(replace_pass ${header_filepath} "${dirpaths}" "${cursp}${nestsp}") echovt "${cursp}inserting '${subintermfile}'." # Replace the #include directive for the current header file with the # contents of that header file, saving the result to a temporary file. # We also insert begin and end markers to allow for more readability. # NOTE: We use the 'i\...' and 'a\...' notation with '$', which causes # bash to interpret '\n' as a newline, as needed for the 'a\' and 'i\' # commands in POSIX (e.g. OS X) sed. (GNU sed allows a much more # natural usage that does not require the backslash or newline.) cat ${intermfile} \ | sed -e "/^[[:space:]]*#include \"${header_esc}\"/ {" \ -e 'i\'$'\n'"// begin ${header}"$'\n' \ -e "r ${subintermfile}" \ -e 'a\'$'\n'"// end ${header}"$'\n' \ -e "d" \ -e "}" \ > "${intermfile}.tmp" mv "${intermfile}.tmp" ${intermfile} echovt "${cursp}removing intermediate file '${subintermfile}'." # Remove the recursive call's intermediate file now that it has been # inserted into this level's intermediate. rm "${subintermfile}" fi done # works, but leaves blank line: #cat "test.h" | sed -e "/^#include \"foo.h\"/r foo.h" -e "s///" > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/ {' -e 'r foo.h' -e 'd' -e '}' > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/r foo.h' -e '/^#include \"foo.h\"/d' > "test.new.h" #cat zorn/header.h | sed -e '/^#include \"header1.h\"/ {' -e 'i // begin insertion' -e 'r alice/header1.h' -e 'a // end insertion' -e 'd' -e '}' echovt "${cursp}header file '${inputfile}' fully processed." echovt "${cursp}returning via '${intermfile}'." echovo_n "." # Return the intermediate filename so the caller knows the name of this # invocation's output file. echo "${intermfile}" } # # -- main function ------------------------------------------------------------- # main() { # The name of the script, stripped of any preceding path. script_name=${0##*/} # The script name to use in informational output. Defaults to ${script_name}. output_name=${script_name} # Whether or not we should strip C-style comments from the output. (Default # is to not strip C-style comments.) strip_comments="" # Whether or not we search the directories in dir_list recursively. (Default # is to not search recursively.) recursive_flag="" # The list of directories to ignore ignore_list="old other temp test testsuite windows" # The amount to nest each level of recursion in the output. nestsp=" " # Process our command line options. while getopts ":o:rchv:" opt; do case $opt in o ) output_name=$OPTARG ;; r ) recursive_flag="1" ;; c ) strip_comments="1" ;; v ) verbose_flag=$OPTARG ;; h ) print_usage ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Make sure that the verboseness level is valid. if [ "${verbose_flag}" != "0" ] && [ "${verbose_flag}" != "1" ] && [ "${verbose_flag}" != "2" ]; then echo "${output_name}: Invalid verboseness argument '${verbose_flag}'." 1>&2; exit 1 fi # Print usage if we don't have exactly two arguments. if [ $# != "4" ]; then print_usage fi # Acquire the four required arguments: # - the input header file, # - the output header file, # - the temporary directory in which we can write intermediate files, # - the list of directories in which to search for the headers inputfile="$1" outputfile="$2" temp_dir="$3" dir_list="$4" # First, confirm that the directories in dir_list are valid. dir_list2="" for item in ${dir_list}; do # Strip a trailing slash from the path, if it has one. item=${item%/} echovt_n "checking ${item} " if [ -d ${item} ]; then echovt_n2 " ...directory exists." dir_list2="${dir_list2} ${item}" else echovt_n2 " ...invalid directory; omitting." fi done dir_list2=$(canonicalize_ws "${dir_list2}") # Overwrite the original dir_list with the updated copy that omits # invalid directories. dir_list="${dir_list2}" echovt "check summary:" echovt " accessible directories:" echovt " ${dir_list}" # Generate a list of directories (dirpaths) which will be searched whenever # a #include directive is encountered. The method by which dirpaths is # compiled will depend on whether the recursive flag was given. if [ -n "${recursive_flag}" ]; then # If the recursive flag was given, we need to recursively scan each # directory in dir_list for directories with headers via the # function find_header_dirs(). dirpaths="" for item in ${dir_list}; do item_dirpaths=$(find_header_dirs ${item}) dirpaths="${dirpaths} ${item_dirpaths}" done dirpaths=$(canonicalize_ws "${dirpaths}") else # If the recursive flag was not given, we can just use dir_list # as-is, though we opt to filter out the directories that don't # contain .h files. dirpaths="" for item in ${dir_list}; do echovt_n "scanning ${item}" # Acquire a list of the directory's contents. sub_items=$(ls ${item}) # If there is at least one header present, add the current directory to # the list header of directories. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then dirpaths="${dirpaths} ${item}" echovt_n2 " ...found headers." else echovt_n2 " ...no headers found." fi done dirpaths=$(canonicalize_ws "${dirpaths}") fi echovt "scan summary:" echovt " headers found in:" echovt " ${dirpaths}" echovt "preparing to monolithify '${inputfile}'." # Make a copy of the inputfile. #cp ${inputfile} ${outputfile} echovt "new header will be saved to '${outputfile}'." echovo_n "." # Recursively substitute headers for occurrences of #include directives. intermfile=$(replace_pass ${inputfile} "${dirpaths}" "${nestsp}") # Rename the intermediate file(path) to the output file(path). mv ${intermfile} ${outputfile} echovt "substitution complete." echovt "monolithic header saved as '${outputfile}'." echovo_n2 "." # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-1.1/build/gen-libblis-symbols.sh000077500000000000000000000027611474157777200176520ustar00rootroot00000000000000#!/usr/bin/env bash get_config_var() { # Parse the compiler assigned to the CC variable within the config.mk file. echo "$(grep "^ *$1 *:=" config.mk | sed 's/'$1' *:= *//')" } main() { if [ ! -e config.mk ]; then echo "No config.mk file detected; have you configured BLIS?" exit 1 fi CC=$(get_config_var CC) CONFIG_NAME=$(get_config_var CONFIG_NAME) BLIS_H_FLAT="include/${CONFIG_NAME}/blis.h" if [ ! -e ${BLIS_H_FLAT} ]; then echo "No monolithic blis.h file detected at ${BLIS_H_FLAT}; have you run 'make'?" exit 1 fi # # Header line # echo "EXPORTS" # # Breakdown of commands: # $(CC) ... # Pre-process blis.h, making sure to include all BLAS and CBLAS symbols # | tr ... # Make sure to split lines at ';' so that each declaration is on its own line # | grep ... # Find exported symbols # | sed -E # -e ... # 1. Remove all __attribute__ clauses # -e ... # 2. Select only the portion before an opening '(' (if any) # -e ... # 3. Pull out the last word, which is the function name. # | grep ... # Remove constants # | grep ... # Remove blank lines # | sed ... # Remove trailing spaces # | sort # | uniq # ${CC} -DBLIS_ENABLE_CBLAS=1 -DBLIS_ENABLE_BLAS=1 -E ${BLIS_H_FLAT} \ | tr ';' '\n' \ | grep visibility \ | sed -E \ -e 's/__attribute__ *\( *\([^\)]+(\([^\)]+\) *)\) *\)//g' \ -e 's/(.*) *\(.*/\1/' \ -e 's/.* ([^ ].*)/\1/' \ | grep -v BLIS \ | grep -E '[^ ]' \ | sed -e 's/[[:space:]]*$//g' \ | sort \ | uniq } main "$@" blis-1.1/build/gen-make-frags/000077500000000000000000000000001474157777200162145ustar00rootroot00000000000000blis-1.1/build/gen-make-frags/fragment.mk000066400000000000000000000066231474157777200203570ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # fragment.mk # # This is an automatically-generated makefile fragment and will likely get # overwritten or deleted if the user is not careful. Modify at your own risk. # # These two mmakefile variables need to be set in order for the recursive # include process to work! CURRENT_DIR_NAME := _mkfile_fragment_cur_dir_name_ CURRENT_SUB_DIRS := _mkfile_fragment_sub_dir_names_ # Source files local to this fragment LOCAL_SRC_FILES := _mkfile_fragment_local_src_files_ # Add the fragment's local source files to the _global_variable_ variable. _mkfile_fragment_src_var_name_ += $(addprefix $(PARENT_SRC_PATH)/$(CURRENT_DIR_NAME)/, $(LOCAL_SRC_FILES)) # ----------------------------------------------------------------------------- # NOTE: The code below is generic and should remain in all fragment.mk files! # ----------------------------------------------------------------------------- # Add the current fragment to the global list of fragments so the top-level # Makefile knows which directories are participating in the build. FRAGMENT_DIR_PATHS += $(PARENT_SRC_PATH)/$(CURRENT_DIR_NAME) # Recursively descend into other subfragments' local makefiles and include them. ifneq ($(strip $(CURRENT_SUB_DIRS)),) key1 := $(key1).x key2 := $(key2).y stack_$(key1) := $(PARENT_PATH) stack_$(key2) := $(PARENT_SRC_PATH) PARENT_PATH := $(PARENT_PATH)/$(CURRENT_DIR_NAME) PARENT_SRC_PATH := $(PARENT_SRC_PATH)/$(CURRENT_DIR_NAME) FRAGMENT_SUB_DIRS := $(addprefix $(PARENT_PATH)/, $(CURRENT_SUB_DIRS)) -include $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_SUB_DIRS)) PARENT_PATH := $(stack_$(key1)) PARENT_SRC_PATH := $(stack_$(key2)) key1 := $(basename $(key1)) key2 := $(basename $(key2)) endif blis-1.1/build/gen-make-frags/gen-make-frag.sh000077500000000000000000000403301474157777200211540ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # gen-make-frag.sh # # Field G. Van Zee # print_usage() { #local script_name # Get the script name #script_name=${0##*/} # Echo usage info echo " " echo " "$script_name echo " " echo " Field G. Van Zee" echo " " echo " Automatically generates makefile fragments for a specified directory" echo " tree. " echo " " echo " Usage:" echo " ${script_name} [options] root_dir frag_dir templ.mk suff_list ign_list" echo " " echo " Arguments (mandatory):" echo " " echo " root_dir The root directory to scan when generating makefile" echo " fragments." echo " " echo " frag_dir The root directory in which makefile fragments will be" echo " generated." echo " " echo " templ.mk The template makefile fragment used to generate the actual" echo " fragments." echo " " echo " suff_list File containing a newline-separated list of file suffixes" echo " of source files to that the top-level makefile expects to" echo " access." echo " " echo " ign_list File containing a newline-separated list of directory names" echo " to ignore when descending recursively into " echo " " echo " The following options are accepted:" echo " " echo " -d dry-run" echo " Go through all the motions, but don't actually generate any" echo " makefile fragments." echo " -r recursive" echo " Also generate makefile fragments for subdirectories of" echo " root_dir." echo " -h hide" echo " Hide the makefile fragments by prepending filenames with '.'." echo " -p PREFIX prefix name" echo " Use PREFIX instead of uppercased root_dir in the makefile" echo " variable name. If the root_dir were 'stuff' and -p was not" echo " used, then source would be accumulated into a makefile" echo " variable named 'MK_STUFF', but if -p JUNK were given, then" echo " the variable name would instead be MK_JUNK." echo " -o SCRIPT output script name" echo " Use SCRIPT when outputting messages instead of the script's" echo " actual name." echo " -v [0|1|2] verboseness level" echo " level 0: silent (no output)" echo " level 1: default (one line per directory)" echo " level 2: verbose (several lines per directory)." echo " " # Exit with non-zero exit status exit 1 } # # gen_mkfile() # # Creates a single makefile fragment in a user-specified directory and adds # any local source files found to a top-level Makefile variable. # gen_mkfile() { # Local variable declarations local mkfile_frag_var_name local this_dir local this_frag_dir local mkfile_frag_tmpl_name local mkfile_name local mkfile_frag_path local cur_frag_dir local cur_frag_path local local_src_files local sub_items local item_path local item_suffix local cur_frag_sub_dirs # Extract our arguments to local variables mkfile_frag_var_name=$1 this_dir=$2 this_frag_dir=$3 # Strip the leading path from the template makefile path to get its # simple filename. Hide the output makefile fragment filename, if # requested. mkfile_frag_tmpl_name=${mkfile_frag_tmpl_path##*/} if [ -n "$hide_flag" ]; then mkfile_frag_path=$this_frag_dir/.$mkfile_frag_tmpl_name else mkfile_frag_path=$this_frag_dir/$mkfile_frag_tmpl_name fi # Determine the directory in which the fragment will reside. cur_frag_path=$this_dir cur_frag_dir=${this_dir##*/} # Initialize the local source list to empty local_src_files="" # Get a listing of the items in $this_dir sub_items=$(ls $this_dir) # Generate a list of the source files we've chosen for item in $sub_items; do # Prepend the directory to the item to get a relative path item_path=$this_dir/$item # Acquire the item's suffix, if it has one item_suffix=${item_path##*.} # If the suffix matches, then add it to our list if is_in_list $item_suffix "$src_file_suffixes" then local_src_files="$local_src_files $item" fi done # Delete the leading " " space character in the local source files list. local_src_files=${local_src_files##" "} # Initialize the fragment subdirectory list to empty cur_frag_sub_dirs="" # Capture the relative path listing of items in $this_dir. sub_items=$(ls $this_dir) # Determine the fragment's subdirectory names, if any exist for item in $sub_items; do # Prepend the directory to the item to get a relative path item_path=$this_dir/$item # If item is a directory, and it's not in the ignore list, descend into it. #if [ -d $item_path ] && ! should_ignore $item; then if [ -d $item_path ] && ! is_in_list $item "$ignore_dirs" ; then cur_frag_sub_dirs=$cur_frag_sub_dirs" "$item fi done # Delete the leading " " space character in fragment's subdirectory list. cur_frag_sub_dirs=${cur_frag_sub_dirs##" "} # Be verbose, if level 2 was requested. if [ "$verbose_flag" = "2" ]; then echo "mkf frag tmpl path: $mkfile_frag_tmpl_path" echo "mkf frag path: $mkfile_frag_path" echo "cur frag path: $cur_frag_path" echo "cur frag dir: $cur_frag_dir" echo "cur frag sub dirs: $cur_frag_sub_dirs" echo "local src files: $local_src_files" echo "src file suffixes: $src_file_suffixes" echo "mkf frag var name: $mkfile_frag_var_name" echo "--------------------------------------------------" fi # Copy the template makefile to the directory given, using the new # makefile name we just created above. if [ -z "$dry_run_flag" ]; then cat $mkfile_frag_tmpl_path | sed -e s/"$mkfile_fragment_cur_dir_name_anchor"/"$cur_frag_dir"/g \ | sed -e s/"$mkfile_fragment_sub_dir_names_anchor"/"$cur_frag_sub_dirs"/g \ | sed -e s/"$mkfile_fragment_local_src_files_anchor"/"$local_src_files"/g \ | sed -e s/"$mkfile_fragment_src_var_name_anchor"/"$mkfile_frag_var_name"/g \ > $mkfile_frag_path fi # Return peacefully. return 0 } # # gen_mkfiles # # Recursively generates makefile fragments for a directory and all # subdirectories. All of the actual work happens in gen_mkfile(). # gen_mkfiles() { # Local variable declarations local item sub_items cur_dir this_frag_dir this_dir # Extract our argument cur_dir=$1 this_frag_dir=$2 # Append a relevant suffix to the makefile variable name, if necesary # NOTE: This step is disabled because special directories are presently # ignored when generating makefile variable names. #all_add_src_var_name "$cur_dir" # Be verbose if level 2 was requested if [ "$verbose_flag" = "2" ]; then echo ">>>" $script_name ${src_var_name}_$SRC $cur_dir $this_frag_dir elif [ "$verbose_flag" = "1" ]; then echo "$script_name: creating makefile fragment in $this_frag_dir from $cur_dir" fi # Call our function to generate a makefile in the directory given. gen_mkfile "${src_var_name}_$SRC" $cur_dir $this_frag_dir # Get a listing of the directories in $directory sub_items=$(ls $cur_dir) # Descend into the contents of root_dir to generate the subdirectories' # makefile fragments. for item in $sub_items; do # If item is a directory, and it's not in the ignore list, descend into it. #if [ -d "$cur_dir/$item" ] && ! should_ignore $item; then if [ -d "$cur_dir/$item" ] && ! is_in_list $item "$ignore_dirs" ; then gen_mkfiles $cur_dir/$item $this_frag_dir/$item fi done # Remove a relevant suffix from the makefile variable name, if necesary # NOTE: This step is disabled because special directories are presently # ignored when generating makefile variable names. #all_del_src_var_name "$cur_dir" # Return peacefully return 0 } #update_src_var_name_special() #{ # local dir act i name var_suffix # # # Extract arguments. # act="$1" # dir="$2" # # # Strip / from end of directory path, if there is one, and then strip # # path from directory name. # dir=${dir%/} # dir=${dir##*/} # # # Run through our list. # # NOTE: CURRENTLY, SPECIAL DIRECTORY NAMES ARE IGNORED. In order to # # re-enable them, remove the quotes from "${special_dirs}". # for specdir in "${special_dirs}"; do # # # If the current item matches sdir, then we'll have # # to make a modification of some form. # if [ "$dir" = "$specdir" ]; then # # # Convert the directory name to uppercase. # var_suffix=$(echo "$dir" | tr '[:lower:]' '[:upper:]') # # # Either add or remove the suffix, and also update the # # source file suffix variable. # if [ "$act" == "+" ]; then # src_var_name=${src_var_name}_$var_suffix # else # src_var_name=${src_var_name%_$var_suffix} # fi # # # No need to continue iterating. # break; # fi # done #} #init_src_var_name() #{ # local dir="$1" # # # Strip off the leading / if there is one # dir=${dir%%/} # # # Convert the / directory separators into spaces to make a list of # # directories. # list=${dir//\// } # # # Inspect each item in $list # for item in $list; do # # # Try to initialize the source variable name # all_add_src_var_name $item # done #} #all_add_src_var_name() #{ # local dir="$1" # # update_src_var_name_special "+" "$dir" # #} #all_del_src_var_name() #{ # local dir="$1" # # update_src_var_name_special "-" "$dir" #} read_mkfile_config() { # Read the file describing file suffixes. src_file_suffixes=$(cat "${suffix_file}") # Read the file listing the directories to ignore. ignore_dirs=$(cat "${ignore_file}") # Change newlines into spaces. This is optional, but helps when # printing these values out (so they appear on one line). src_file_suffixes=$(echo ${src_file_suffixes} | sed "s/\n/ /g") ignore_dirs=$(echo ${ignore_dirs} | sed "s/\n/ /g") } main() { # -- BEGIN GLOBAL VARIABLE DECLARATIONS -- # Define these makefile template "anchors" used in gen_mkfile(). mkfile_fragment_cur_dir_name_anchor="_mkfile_fragment_cur_dir_name_" mkfile_fragment_sub_dir_names_anchor="_mkfile_fragment_sub_dir_names_" mkfile_fragment_local_src_files_anchor="_mkfile_fragment_local_src_files_" mkfile_fragment_src_var_name_anchor="_mkfile_fragment_src_var_name_" # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The prefix for all makefile variables. src_var_name_prefix='MK' # The variable that always holds the string that will be passed to # gen_mkfile() as the source variable to insert into the fragment.mk. src_var_name='' # The suffix appended to all makefile fragment source variables. SRC='SRC' # The list of source file suffixes to add to the makefile variables. src_file_suffixes='' # The lists of directories to ignore. ignore_dirs='' # The arguments to this function. They'll get assigned meaningful # values after getopts. root_dir="" frag_dir="" mkfile_frag_tmpl_path="" suffix_file="" ignore_file="" # Flags set by getopts. dry_run_flag="" hide_flag="" recursive_flag="" output_name="" prefix_flag="" verbose_flag="" # -- END GLOBAL VARIABLE DECLARATIONS -- # Local variable declarations. local item sub_items this_dir # Process our command line options. while getopts ":dho:p:rv:" opt; do case $opt in d ) dry_run_flag="1" ;; h ) hide_flag="1" ;; r ) recursive_flag="1" ;; o ) output_name=$OPTARG ;; p ) prefix_flag=$OPTARG ;; v ) verbose_flag=$OPTARG ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Make sure that verboseness level is valid. if [ "$verbose_flag" != "0" ] && [ "$verbose_flag" != "1" ] && [ "$verbose_flag" != "2" ]; then verbose_flag="1" fi # Check the number of arguments after command line option processing. if [ $# != "5" ]; then print_usage fi # If an output script name was given, overwrite script_name with it. if [ -n "${output_name}" ]; then script_name="${output_name}" fi # Extract our arguments. root_dir=$1 frag_dir=$2 mkfile_frag_tmpl_path=$3 suffix_file=$4 ignore_file=$5 # Read the makefile config files to be used in the makefile fragment # generation. read_mkfile_config # Strip / from end of directory path, if there is one. root_dir=${root_dir%/} frag_dir=${frag_dir%/} # Initialize the name of the makefile source variable. if [ -n "$prefix_flag" ]; then # If prefix_flag is not null, then we construct src_var_name using # it instead of root_dir. So if the prefix is 'junk', we will get # makefile variables that begin with 'MK_JUNK'. root_dir_upper=$(echo "$prefix_flag" | tr '[:lower:]' '[:upper:]') src_var_name="${src_var_name_prefix}_${root_dir_upper}" else # Otherwise, we use root_dir. If the root directory is 'foo' then # makefile variables will begin with 'MK_FOO'. # We are also careful to convert forward slashes into underscore so # root directories such as foo/bar result in makefile variables # that begin with 'MK_FOO_BAR'. root_dir_upper=$(echo "$root_dir" | tr '[:lower:]' '[:upper:]') root_dir_upper=$(echo "$root_dir_upper" | tr '/' '_') src_var_name="${src_var_name_prefix}_${root_dir_upper}" fi # Be verbose if level 2 was requested. if [ "$verbose_flag" = "2" ]; then echo ">>>" $script_name ${src_var_name}_$SRC $root_dir $frag_dir elif [ "$verbose_flag" = "1" ]; then echo "$script_name: creating makefile fragment in $frag_dir from $root_dir" fi # Call our function to generate a makefile in the root directory given. gen_mkfile "${src_var_name}_$SRC" $root_dir $frag_dir # If we were asked to act recursively, then continue processing # root_dir's contents. if [ -n "$recursive_flag" ]; then # Get a listing of the directories in $directory. sub_items=$(ls $root_dir) # Descend into the contents of root_dir to generate the makefile # fragments. for item in $sub_items; do # If item is a directory, and it's not in the ignore list, descend into it. #if [ -d "$root_dir/$item" ] && ! should_ignore $item ; then if [ -d "$root_dir/$item" ] && ! is_in_list $item "$ignore_dirs" ; then gen_mkfiles $root_dir/$item $frag_dir/$item fi done fi # Exit peacefully. return 0 } is_in_list() { local cur_item the_item item_list # Extract argument. the_item="$1" item_list="$2" # Check each item in the list against the item of interest. for cur_item in ${item_list}; do # If the current item in the list matches the one of interest. if [ "${cur_item}" = "${the_item}" ]; then # Return success (ie: item was found). return 0 fi done # If we made it this far, return failure (ie: item not found). return 1 } # The script's main entry point, passing all parameters given. main "$@" blis-1.1/build/gen-make-frags/ignore_list000066400000000000000000000000621474157777200204530ustar00rootroot00000000000000attic broken old other temp tmp test p10_testsuiteblis-1.1/build/gen-make-frags/special_list000066400000000000000000000000161474157777200206070ustar00rootroot00000000000000noopt kernels blis-1.1/build/gen-make-frags/suffix_list000066400000000000000000000000211474157777200204670ustar00rootroot00000000000000c cc cpp cxx s S blis-1.1/build/irun.py000077500000000000000000000241441474157777200147670ustar00rootroot00000000000000#!/usr/bin/env python3 # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Import modules import os import sys import getopt import re import subprocess import time import statistics def print_usage(): my_print( " " ) my_print( " %s" % script_name ) my_print( " " ) my_print( " Field G. Van Zee" ) my_print( " " ) my_print( " Repeatedly run a test driver and accumulate statistics for the" ) my_print( " output." ) my_print( " " ) my_print( " Usage:" ) my_print( " " ) my_print( " %s [options] drivername" % script_name ) my_print( " " ) my_print( " Arguments:" ) my_print( " " ) my_print( " drivername The filename/path of the test driver to run. The" ) my_print( " test driver must output its performance data to" ) my_print( " standard output." ) my_print( " " ) my_print( " The following options are accepted:" ) my_print( " " ) my_print( " -c num performance column index" ) my_print( " Find the performance result in column index of" ) my_print( " the test driver's output. Here, a column is defined" ) my_print( " as a contiguous sequence of non-whitespace characters," ) my_print( " with the column indices beginning at 0. By default," ) my_print( " the second-to-last column index in the output is used." ) my_print( " " ) my_print( " -d delay sleep() delay" ) my_print( " Wait seconds after each execution of the" ) my_print( " test driver. The default delay is 0." ) my_print( " " ) my_print( " -n niter number of iterations" ) my_print( " Execute the test driver times. The default" ) my_print( " value is 10." ) my_print( " " ) my_print( " -q quiet; summary only" ) my_print( " Do not output statistics after every new execution of" ) my_print( " the test driver; instead, only output the final values" ) my_print( " after all iterations are complete. The default is to" ) my_print( " output updated statistics after each iteration." ) my_print( " " ) my_print( " -h help" ) my_print( " Output this information and exit." ) my_print( " " ) # ------------------------------------------------------------------------------ def my_print( s ): sys.stdout.write( "%s\n" % s ) #sys.stdout.flush() # ------------------------------------------------------------------------------ # Global variables. script_name = None output_name = None def main(): global script_name global output_name # Obtain the script name. path, script_name = os.path.split(sys.argv[0]) output_name = script_name # Default values for optional arguments. #perf_col = 9 perf_col = -1 delay = 0 niter = 10 quiet = False # Process our command line options. try: opts, args = getopt.getopt( sys.argv[1:], "c:d:n:hq" ) except getopt.GetoptError as err: # print help information and exit: my_print( str(err) ) # will print something like "option -a not recognized" print_usage() sys.exit(2) for opt, optarg in opts: if opt == "-c": perf_col = optarg elif opt == "-d": delay = optarg elif opt == "-n": niter = optarg elif opt == "-q": quiet = True elif opt == "-h": print_usage() sys.exit() else: print_usage() sys.exit() # Print usage if we don't have exactly one argument. if len( args ) != 1: print_usage() sys.exit() # Acquire our only mandatory argument: the name of the test driver. driverfile = args[0] #my_print( "test driver: %s" % driverfile ) #my_print( "column num: %s" % perf_col ) #my_print( "delay: %s" % delay ) #my_print( "num iter: %s" % niter ) # Build a list of iterations. iters = range( int(niter) ) # Run the test driver once to detect the number of lines of output. p = subprocess.run( driverfile, stdout=subprocess.PIPE ) lines0 = p.stdout.decode().splitlines() num_lines0 = int(len(lines0)) # Initialize the list of lists (one list per performance result). aperf = [] for i in range( num_lines0 ): aperf.append( [] ) for it in iters: # Run the test driver. p = subprocess.run( driverfile, stdout=subprocess.PIPE ) # Acquire the lines of output. lines = p.stdout.decode().splitlines() # Accumulate the test driver's latest results into aperf. for i in range( num_lines0 ): # Parse the current line to find the performance value. line = lines[i] words = line.split() if perf_col == -1: perf = words[ len(words)-2 ] else: perf = words[ int(perf_col) ] # As unlikely as it is, guard against Inf and NaN. if float(perf) == float('Inf') or \ float(perf) == -float('Inf') or \ float(perf) == float('NaN'): perf = 0.0 # Add the performance value to the list at the ith entry of aperf. aperf[i].append( float(perf) ) # Compute stats for the current line. avgp = statistics.mean( aperf[i] ) maxp = max( aperf[i] ) minp = min( aperf[i] ) # Only compute stdev() when we have two or more data points. if len( aperf[i] ) > 1: stdp = statistics.stdev( aperf[i] ) else: stdp = 0.0 # Construct a string to match the performance value and then # use that string to search-and-replace with four format specs # for the min, avg, max, and stdev values computed above. search = '%8s' % perf newline = re.sub( str(search), ' %7.2f %7.2f %7.2f %6.2f', line ) # Search for the column index range that would be present if this were # matlab-compatible output. The index range will typically be 1:n, # where n is the number of columns of data. found_index = False for word in words: if re.match( '1:', word ): index_str = word found_index = True break # If we find the column index range, we need to update it to reflect # the replacement of one column of data with four, for a net increase # of columns. We do so via another instance of re.sub() in which we # search for the old index string and replace it with the new one. if found_index: last_col = int(index_str[2]) + 3 new_index_str = '1:%1s' % last_col newline = re.sub( index_str, new_index_str, newline ) # If the quiet flag was not give, output the intermediate results. if not quiet: print( newline % ( float(minp), float(avgp), float(maxp), float(stdp) ) ) # Flush stdout after each set of output prior to sleeping. sys.stdout.flush() # Sleep for a bit until the next iteration. time.sleep( int(delay) ) # If the quiet flag was given, output the final results. if quiet: for i in range( num_lines0 ): # Parse the current line to find the performance value (only # needed for call to re.sub() below). line = lines0[i] words = line.split() if perf_col == -1: perf = words[ len(words)-2 ] else: perf = words[ int(perf_col) ] # Compute stats for the current line. avgp = statistics.mean( aperf[i] ) maxp = max( aperf[i] ) minp = min( aperf[i] ) # Only compute stdev() when we have two or more data points. if len( aperf[i] ) > 1: stdp = statistics.stdev( aperf[i] ) else: stdp = 0.0 # Construct a string to match the performance value and then # use that string to search-and-replace with four format specs # for the min, avg, max, and stdev values computed above. search = '%8s' % perf newline = re.sub( str(search), ' %7.2f %7.2f %7.2f %6.2f', line ) # Search for the column index range that would be present if this were # matlab-compatible output. The index range will typically be 1:n, # where n is the number of columns of data. found_index = False for word in words: if re.match( '1:', word ): index_str = word found_index = True break # If we find the column index range, we need to update it to reflect # the replacement of one column of data with four, for a net increase # of columns. We do so via another instance of re.sub() in which we # search for the old index string and replace it with the new one. if found_index: last_col = int(index_str[2]) + 3 new_index_str = '1:%1s' % last_col newline = re.sub( index_str, new_index_str, newline ) # Output the results for the current line. print( newline % ( float(minp), float(avgp), float(maxp), float(stdp) ) ) # Flush stdout afterwards. sys.stdout.flush() # Return from main(). return 0 if __name__ == "__main__": main() blis-1.1/build/libblis-symbols.def000066400000000000000000000514451474157777200172270ustar00rootroot00000000000000EXPORTS bli_abort bli_absqsc bli_acquire_mij bli_acquire_mpart bli_acquire_mpart_b2t bli_acquire_mpart_br2tl bli_acquire_mpart_l2r bli_acquire_mpart_mdim bli_acquire_mpart_mndim bli_acquire_mpart_ndim bli_acquire_mpart_r2l bli_acquire_mpart_t2b bli_acquire_mpart_tl2br bli_acquire_vi bli_acquire_vpart_b2f bli_acquire_vpart_f2b bli_addd bli_addd_ex bli_addm bli_addm_ex bli_addsc bli_addv bli_addv_ex bli_align_dim_to_mult bli_align_dim_to_size bli_align_ptr_to_size bli_amaxv bli_amaxv_ex bli_arch_query_id bli_arch_string bli_asumv bli_asumv_ex bli_axpbyv bli_axpbyv_ex bli_axpy2v bli_axpy2v_ex bli_axpyd bli_axpyd_ex bli_axpyf bli_axpyf_ex bli_axpym bli_axpym_ex bli_axpyv bli_axpyv_ex bli_blksz_create bli_blksz_create_ed bli_blksz_free bli_blksz_init bli_blksz_init_easy bli_blksz_init_ed bli_cabsqsc bli_caddd bli_caddd_ex bli_caddm bli_caddm_ex bli_caddsc bli_caddv bli_caddv_ex bli_camaxv bli_camaxv_ex bli_castm bli_castnzm bli_castv bli_casumv bli_casumv_ex bli_caxpbyv bli_caxpbyv_ex bli_caxpy2v bli_caxpy2v_ex bli_caxpyd bli_caxpyd_ex bli_caxpyf bli_caxpyf_ex bli_caxpym bli_caxpym_ex bli_caxpyv bli_caxpyv_ex bli_cccastm bli_cccastnzm bli_cccastv bli_cccopysc bli_ccopyd bli_ccopyd_ex bli_ccopym bli_ccopym_ex bli_ccopyv bli_ccopyv_ex bli_ccxpbym_md bli_ccxpbym_md_ex bli_cdcastm bli_cdcastnzm bli_cdcastv bli_cdcopysc bli_cdivsc bli_cdotaxpyv bli_cdotaxpyv_ex bli_cdotv bli_cdotv_ex bli_cdotxaxpyf bli_cdotxaxpyf_ex bli_cdotxf bli_cdotxf_ex bli_cdotxv bli_cdotxv_ex bli_cdxpbym_md bli_cdxpbym_md_ex bli_ceqm bli_ceqsc bli_ceqv bli_cfprintm bli_cfprintv bli_cgemm bli_cgemm_ex bli_cgemmt bli_cgemmt_ex bli_cgemv bli_cgemv_ex bli_cger bli_cger_ex bli_cgetijm bli_cgetijv bli_cgetsc bli_cgtesc bli_cgtsc bli_check_error_code_helper bli_chemm bli_chemm_ex bli_chemv bli_chemv_ex bli_cher bli_cher2 bli_cher2_ex bli_cher2k bli_cher2k_ex bli_cher_ex bli_cherk bli_cherk_ex bli_cinvertd bli_cinvertd_ex bli_cinvertsc bli_cinvertv bli_cinvertv_ex bli_cinvscald bli_cinvscald_ex bli_cinvscalm bli_cinvscalm_ex bli_cinvscalv bli_cinvscalv_ex bli_clock bli_clock_min_diff bli_cltesc bli_cltsc bli_cmachval bli_cmkherm bli_cmkherm_ex bli_cmksymm bli_cmksymm_ex bli_cmktrim bli_cmktrim_ex bli_cmulsc bli_cnorm1m bli_cnorm1m_ex bli_cnorm1v bli_cnorm1v_ex bli_cnormfm bli_cnormfm_ex bli_cnormfsc bli_cnormfv bli_cnormfv_ex bli_cnormim bli_cnormim_ex bli_cnormiv bli_cnormiv_ex bli_cntl_clear_node bli_cntl_copy bli_cntl_create_node bli_cntl_free bli_cntl_free_node bli_cntl_mark_family bli_cntx_clear bli_cntx_print bli_cntx_set_blkszs bli_cntx_set_ind_blkszs bli_cntx_set_l3_sup_handlers bli_cntx_set_ukr_prefs bli_cntx_set_ukrs bli_copyd bli_copyd_ex bli_copym bli_copym_ex bli_copysc bli_copyv bli_copyv_ex bli_cprintm bli_cprintv bli_crandm bli_crandm_ex bli_crandnm bli_crandnm_ex bli_crandnv bli_crandnv_ex bli_crandv bli_crandv_ex bli_cscal2d bli_cscal2d_ex bli_cscal2m bli_cscal2m_ex bli_cscal2v bli_cscal2v_ex bli_cscald bli_cscald_ex bli_cscalm bli_cscalm_ex bli_cscalv bli_cscalv_ex bli_cscastm bli_cscastnzm bli_cscastv bli_cscopysc bli_csetd bli_csetd_ex bli_csetid bli_csetid_ex bli_csetijm bli_csetijv bli_csetm bli_csetm_ex bli_csetsc bli_csetv bli_csetv_ex bli_cshiftd bli_cshiftd_ex bli_csqrtrsc bli_csqrtsc bli_csubd bli_csubd_ex bli_csubm bli_csubm_ex bli_csubsc bli_csubv bli_csubv_ex bli_csumsqv bli_csumsqv_ex bli_cswapv bli_cswapv_ex bli_csxpbym_md bli_csxpbym_md_ex bli_csymm bli_csymm_ex bli_csymv bli_csymv_ex bli_csyr bli_csyr2 bli_csyr2_ex bli_csyr2k bli_csyr2k_ex bli_csyr_ex bli_csyrk bli_csyrk_ex bli_ctrmm bli_ctrmm3 bli_ctrmm3_ex bli_ctrmm_ex bli_ctrmv bli_ctrmv_ex bli_ctrsm bli_ctrsm_ex bli_ctrsv bli_ctrsv_ex bli_cunzipsc bli_cxpbyd bli_cxpbyd_ex bli_cxpbym bli_cxpbym_ex bli_cxpbyv bli_cxpbyv_ex bli_czcastm bli_czcastnzm bli_czcastv bli_czcopysc bli_czipsc bli_czxpbym_md bli_czxpbym_md_ex bli_dabsqsc bli_daddd bli_daddd_ex bli_daddm bli_daddm_ex bli_daddsc bli_daddv bli_daddv_ex bli_damaxv bli_damaxv_ex bli_dasumv bli_dasumv_ex bli_daxpbyv bli_daxpbyv_ex bli_daxpy2v bli_daxpy2v_ex bli_daxpyd bli_daxpyd_ex bli_daxpyf bli_daxpyf_ex bli_daxpym bli_daxpym_ex bli_daxpyv bli_daxpyv_ex bli_dccastm bli_dccastnzm bli_dccastv bli_dccopysc bli_dcopyd bli_dcopyd_ex bli_dcopym bli_dcopym_ex bli_dcopyv bli_dcopyv_ex bli_dcxpbym_md bli_dcxpbym_md_ex bli_ddcastm bli_ddcastnzm bli_ddcastv bli_ddcopysc bli_ddivsc bli_ddotaxpyv bli_ddotaxpyv_ex bli_ddotv bli_ddotv_ex bli_ddotxaxpyf bli_ddotxaxpyf_ex bli_ddotxf bli_ddotxf_ex bli_ddotxv bli_ddotxv_ex bli_ddxpbym_md bli_ddxpbym_md_ex bli_deqm bli_deqsc bli_deqv bli_dfprintm bli_dfprintv bli_dgemm bli_dgemm_ex bli_dgemmt bli_dgemmt_ex bli_dgemv bli_dgemv_ex bli_dger bli_dger_ex bli_dgetijm bli_dgetijv bli_dgetsc bli_dgtesc bli_dgtsc bli_dhemm bli_dhemm_ex bli_dhemv bli_dhemv_ex bli_dher bli_dher2 bli_dher2_ex bli_dher2k bli_dher2k_ex bli_dher_ex bli_dherk bli_dherk_ex bli_dinvertd bli_dinvertd_ex bli_dinvertsc bli_dinvertv bli_dinvertv_ex bli_dinvscald bli_dinvscald_ex bli_dinvscalm bli_dinvscalm_ex bli_dinvscalv bli_dinvscalv_ex bli_divsc bli_dltesc bli_dltsc bli_dmachval bli_dmkherm bli_dmkherm_ex bli_dmksymm bli_dmksymm_ex bli_dmktrim bli_dmktrim_ex bli_dmulsc bli_dnorm1m bli_dnorm1m_ex bli_dnorm1v bli_dnorm1v_ex bli_dnormfm bli_dnormfm_ex bli_dnormfsc bli_dnormfv bli_dnormfv_ex bli_dnormim bli_dnormim_ex bli_dnormiv bli_dnormiv_ex bli_dotaxpyv bli_dotaxpyv_ex bli_dotv bli_dotv_ex bli_dotxaxpyf bli_dotxaxpyf_ex bli_dotxf bli_dotxf_ex bli_dotxv bli_dotxv_ex bli_dprintm bli_dprintv bli_drandm bli_drandm_ex bli_drandnm bli_drandnm_ex bli_drandnv bli_drandnv_ex bli_drandv bli_drandv_ex bli_dscal2d bli_dscal2d_ex bli_dscal2m bli_dscal2m_ex bli_dscal2v bli_dscal2v_ex bli_dscald bli_dscald_ex bli_dscalm bli_dscalm_ex bli_dscalv bli_dscalv_ex bli_dscastm bli_dscastnzm bli_dscastv bli_dscopysc bli_dsetd bli_dsetd_ex bli_dsetid bli_dsetid_ex bli_dsetijm bli_dsetijv bli_dsetm bli_dsetm_ex bli_dsetsc bli_dsetv bli_dsetv_ex bli_dshiftd bli_dshiftd_ex bli_dsqrtrsc bli_dsqrtsc bli_dsubd bli_dsubd_ex bli_dsubm bli_dsubm_ex bli_dsubsc bli_dsubv bli_dsubv_ex bli_dsumsqv bli_dsumsqv_ex bli_dswapv bli_dswapv_ex bli_dsxpbym_md bli_dsxpbym_md_ex bli_dsymm bli_dsymm_ex bli_dsymv bli_dsymv_ex bli_dsyr bli_dsyr2 bli_dsyr2_ex bli_dsyr2k bli_dsyr2k_ex bli_dsyr_ex bli_dsyrk bli_dsyrk_ex bli_dt_size bli_dt_string bli_dtrmm bli_dtrmm3 bli_dtrmm3_ex bli_dtrmm_ex bli_dtrmv bli_dtrmv_ex bli_dtrsm bli_dtrsm_ex bli_dtrsv bli_dtrsv_ex bli_dunzipsc bli_dxpbyd bli_dxpbyd_ex bli_dxpbym bli_dxpbym_ex bli_dxpbyv bli_dxpbyv_ex bli_dzcastm bli_dzcastnzm bli_dzcastv bli_dzcopysc bli_dzipsc bli_dzxpbym_md bli_dzxpbym_md_ex bli_eqm bli_eqsc bli_eqv bli_error_checking_level bli_error_checking_level_set bli_finalize bli_fprintm bli_fprintv bli_free_user bli_gemm bli_gemm_ex bli_gemm_ukernel bli_gemmt bli_gemmt_ex bli_gemmtrsm_ukernel bli_gemv bli_gemv_ex bli_ger bli_ger_ex bli_getijm bli_getijv bli_getopt bli_getopt_init_state bli_getsc bli_gks_init_ref_cntx bli_gks_l3_ukr_impl_string bli_gks_l3_ukr_impl_type bli_gks_query_cntx bli_gks_query_ind_cntx bli_gks_query_nat_cntx bli_gtesc bli_gtsc bli_hemm bli_hemm_ex bli_hemv bli_hemv_ex bli_her bli_her2 bli_her2_ex bli_her2k bli_her2k_ex bli_her_ex bli_herk bli_herk_ex bli_ifprintm bli_ifprintv bli_igetsc bli_ind_disable bli_ind_disable_all bli_ind_disable_all_dt bli_ind_disable_dt bli_ind_enable bli_ind_enable_dt bli_ind_oper_enable_only bli_ind_oper_find_avail bli_ind_oper_get_avail_impl_string bli_info_get_blas_int_type_size bli_info_get_enable_blas bli_info_get_enable_cblas bli_info_get_enable_hpx bli_info_get_enable_hpx_as_default bli_info_get_enable_memkind bli_info_get_enable_openmp bli_info_get_enable_openmp_as_default bli_info_get_enable_pba_pools bli_info_get_enable_pthreads bli_info_get_enable_pthreads_as_default bli_info_get_enable_sandbox bli_info_get_enable_sba_pools bli_info_get_enable_stay_auto_init bli_info_get_enable_threading bli_info_get_enable_tls bli_info_get_gemm_impl_string bli_info_get_gemm_ukr_impl_string bli_info_get_gemmt_impl_string bli_info_get_gemmtrsm_l_ukr_impl_string bli_info_get_gemmtrsm_u_ukr_impl_string bli_info_get_heap_addr_align_size bli_info_get_heap_stride_align_size bli_info_get_hemm_impl_string bli_info_get_her2k_impl_string bli_info_get_herk_impl_string bli_info_get_int_type_size bli_info_get_int_type_size_str bli_info_get_max_type_size bli_info_get_num_fp_types bli_info_get_page_size bli_info_get_pool_addr_align_size_a bli_info_get_pool_addr_align_size_b bli_info_get_pool_addr_align_size_c bli_info_get_pool_addr_align_size_gen bli_info_get_pool_addr_offset_size_a bli_info_get_pool_addr_offset_size_b bli_info_get_pool_addr_offset_size_c bli_info_get_pool_addr_offset_size_gen bli_info_get_simd_align_size bli_info_get_simd_num_registers bli_info_get_simd_size bli_info_get_stack_buf_align_size bli_info_get_stack_buf_max_size bli_info_get_symm_impl_string bli_info_get_syr2k_impl_string bli_info_get_syrk_impl_string bli_info_get_thread_jrir_rr bli_info_get_thread_jrir_slab bli_info_get_thread_jrir_tlb bli_info_get_trmm3_impl_string bli_info_get_trmm_impl_string bli_info_get_trsm_impl_string bli_info_get_trsm_l_ukr_impl_string bli_info_get_trsm_u_ukr_impl_string bli_info_get_version_str bli_init bli_invertd bli_invertd_ex bli_invertsc bli_invertv bli_invertv_ex bli_invscald bli_invscald_ex bli_invscalm bli_invscalm_ex bli_invscalv bli_invscalv_ex bli_iprintm bli_iprintv bli_isetsc bli_l3_cntl_free bli_l3_thrinfo_create bli_ltesc bli_ltsc bli_machval bli_malloc_user bli_mkherm bli_mkherm_ex bli_mksymm bli_mksymm_ex bli_mktrim bli_mktrim_ex bli_mulsc bli_norm1m bli_norm1m_ex bli_norm1v bli_norm1v_ex bli_normfm bli_normfm_ex bli_normfsc bli_normfv bli_normfv_ex bli_normim bli_normim_ex bli_normiv bli_normiv_ex bli_obj_alloc_buffer bli_obj_attach_buffer bli_obj_create bli_obj_create_1x1 bli_obj_create_1x1_with_attached_buffer bli_obj_create_conf_to bli_obj_create_with_attached_buffer bli_obj_create_without_buffer bli_obj_free bli_obj_print bli_obj_scalar_apply_scalar bli_obj_scalar_attach bli_obj_scalar_cast_to bli_obj_scalar_detach bli_obj_scalar_init_detached bli_obj_scalar_init_detached_copy_of bli_obj_scalar_reset bli_pack_get_pack_a bli_pack_get_pack_b bli_pack_set_pack_a bli_pack_set_pack_b bli_packm_alloc bli_packm_alloc_ex bli_packm_blk_var1 bli_packm_scalar bli_param_map_blis_to_char_conj bli_param_map_blis_to_char_diag bli_param_map_blis_to_char_dt bli_param_map_blis_to_char_side bli_param_map_blis_to_char_trans bli_param_map_blis_to_char_uplo bli_param_map_blis_to_netlib_diag bli_param_map_blis_to_netlib_machval bli_param_map_blis_to_netlib_side bli_param_map_blis_to_netlib_trans bli_param_map_blis_to_netlib_uplo bli_param_map_char_to_blis_conj bli_param_map_char_to_blis_diag bli_param_map_char_to_blis_dt bli_param_map_char_to_blis_side bli_param_map_char_to_blis_trans bli_param_map_char_to_blis_uplo bli_pba_query bli_printm bli_printv bli_projm bli_projv bli_pthread_barrier_destroy bli_pthread_barrier_init bli_pthread_barrier_wait bli_pthread_cond_broadcast bli_pthread_cond_destroy bli_pthread_cond_init bli_pthread_cond_wait bli_pthread_create bli_pthread_join bli_pthread_mutex_destroy bli_pthread_mutex_init bli_pthread_mutex_lock bli_pthread_mutex_trylock bli_pthread_mutex_unlock bli_pthread_once bli_randm bli_randm_ex bli_randnm bli_randnm_ex bli_randnv bli_randnv_ex bli_randv bli_randv_ex bli_rntm_init_from_global bli_rntm_set_num_threads bli_rntm_set_ways bli_rntm_set_ways_for_op bli_sabsqsc bli_saddd bli_saddd_ex bli_saddm bli_saddm_ex bli_saddsc bli_saddv bli_saddv_ex bli_samaxv bli_samaxv_ex bli_sasumv bli_sasumv_ex bli_saxpbyv bli_saxpbyv_ex bli_saxpy2v bli_saxpy2v_ex bli_saxpyd bli_saxpyd_ex bli_saxpyf bli_saxpyf_ex bli_saxpym bli_saxpym_ex bli_saxpyv bli_saxpyv_ex bli_scal2d bli_scal2d_ex bli_scal2m bli_scal2m_ex bli_scal2v bli_scal2v_ex bli_scald bli_scald_ex bli_scalm bli_scalm_ex bli_scalv bli_scalv_ex bli_sccastm bli_sccastnzm bli_sccastv bli_sccopysc bli_scopyd bli_scopyd_ex bli_scopym bli_scopym_ex bli_scopyv bli_scopyv_ex bli_scxpbym_md bli_scxpbym_md_ex bli_sdcastm bli_sdcastnzm bli_sdcastv bli_sdcopysc bli_sdivsc bli_sdotaxpyv bli_sdotaxpyv_ex bli_sdotv bli_sdotv_ex bli_sdotxaxpyf bli_sdotxaxpyf_ex bli_sdotxf bli_sdotxf_ex bli_sdotxv bli_sdotxv_ex bli_sdxpbym_md bli_sdxpbym_md_ex bli_seqm bli_seqsc bli_seqv bli_setd bli_setd_ex bli_setid bli_setid_ex bli_setijm bli_setijv bli_setim bli_setiv bli_setm bli_setm_ex bli_setrm bli_setrv bli_setsc bli_setv bli_setv_ex bli_sfprintm bli_sfprintv bli_sgemm bli_sgemm_ex bli_sgemmt bli_sgemmt_ex bli_sgemv bli_sgemv_ex bli_sger bli_sger_ex bli_sgetijm bli_sgetijv bli_sgetsc bli_sgtesc bli_sgtsc bli_shemm bli_shemm_ex bli_shemv bli_shemv_ex bli_sher bli_sher2 bli_sher2_ex bli_sher2k bli_sher2k_ex bli_sher_ex bli_sherk bli_sherk_ex bli_shiftd bli_shiftd_ex bli_sinvertd bli_sinvertd_ex bli_sinvertsc bli_sinvertv bli_sinvertv_ex bli_sinvscald bli_sinvscald_ex bli_sinvscalm bli_sinvscalm_ex bli_sinvscalv bli_sinvscalv_ex bli_sleep bli_sltesc bli_sltsc bli_smachval bli_smkherm bli_smkherm_ex bli_smksymm bli_smksymm_ex bli_smktrim bli_smktrim_ex bli_smulsc bli_snorm1m bli_snorm1m_ex bli_snorm1v bli_snorm1v_ex bli_snormfm bli_snormfm_ex bli_snormfsc bli_snormfv bli_snormfv_ex bli_snormim bli_snormim_ex bli_snormiv bli_snormiv_ex bli_sprintm bli_sprintv bli_sqrtrsc bli_sqrtsc bli_srandm bli_srandm_ex bli_srandnm bli_srandnm_ex bli_srandnv bli_srandnv_ex bli_srandv bli_srandv_ex bli_sscal2d bli_sscal2d_ex bli_sscal2m bli_sscal2m_ex bli_sscal2v bli_sscal2v_ex bli_sscald bli_sscald_ex bli_sscalm bli_sscalm_ex bli_sscalv bli_sscalv_ex bli_sscastm bli_sscastnzm bli_sscastv bli_sscopysc bli_ssetd bli_ssetd_ex bli_ssetid bli_ssetid_ex bli_ssetijm bli_ssetijv bli_ssetm bli_ssetm_ex bli_ssetsc bli_ssetv bli_ssetv_ex bli_sshiftd bli_sshiftd_ex bli_ssqrtrsc bli_ssqrtsc bli_ssubd bli_ssubd_ex bli_ssubm bli_ssubm_ex bli_ssubsc bli_ssubv bli_ssubv_ex bli_ssumsqv bli_ssumsqv_ex bli_sswapv bli_sswapv_ex bli_ssxpbym_md bli_ssxpbym_md_ex bli_ssymm bli_ssymm_ex bli_ssymv bli_ssymv_ex bli_ssyr bli_ssyr2 bli_ssyr2_ex bli_ssyr2k bli_ssyr2k_ex bli_ssyr_ex bli_ssyrk bli_ssyrk_ex bli_strmm bli_strmm3 bli_strmm3_ex bli_strmm_ex bli_strmv bli_strmv_ex bli_strsm bli_strsm_ex bli_strsv bli_strsv_ex bli_subd bli_subd_ex bli_subm bli_subm_ex bli_subsc bli_subv bli_subv_ex bli_sumsqv bli_sumsqv_ex bli_sunzipsc bli_swapv bli_swapv_ex bli_sxpbyd bli_sxpbyd_ex bli_sxpbym bli_sxpbym_ex bli_sxpbyv bli_sxpbyv_ex bli_symm bli_symm_ex bli_symv bli_symv_ex bli_syr bli_syr2 bli_syr2_ex bli_syr2k bli_syr2k_ex bli_syr_ex bli_syrk bli_syrk_ex bli_szcastm bli_szcastnzm bli_szcastv bli_szcopysc bli_szipsc bli_szxpbym_md bli_szxpbym_md_ex bli_thrcomm_barrier bli_thrcomm_bcast bli_thread_get_ic_nt bli_thread_get_ir_nt bli_thread_get_jc_nt bli_thread_get_jr_nt bli_thread_get_num_threads bli_thread_get_pc_nt bli_thread_get_thread_impl bli_thread_get_thread_impl_str bli_thread_launch bli_thread_range_sub bli_thread_set_num_threads bli_thread_set_num_threads_ bli_thread_set_thread_impl bli_thread_set_ways bli_thread_set_ways_ bli_thrinfo_free bli_trmm bli_trmm3 bli_trmm3_ex bli_trmm_ex bli_trmv bli_trmv_ex bli_trsm bli_trsm_ex bli_trsm_ukernel bli_trsv bli_trsv_ex bli_unzipsc bli_xpbyd bli_xpbyd_ex bli_xpbym bli_xpbym_ex bli_xpbym_md bli_xpbym_md_ex bli_xpbyv bli_xpbyv_ex bli_zabsqsc bli_zaddd bli_zaddd_ex bli_zaddm bli_zaddm_ex bli_zaddsc bli_zaddv bli_zaddv_ex bli_zamaxv bli_zamaxv_ex bli_zasumv bli_zasumv_ex bli_zaxpbyv bli_zaxpbyv_ex bli_zaxpy2v bli_zaxpy2v_ex bli_zaxpyd bli_zaxpyd_ex bli_zaxpyf bli_zaxpyf_ex bli_zaxpym bli_zaxpym_ex bli_zaxpyv bli_zaxpyv_ex bli_zccastm bli_zccastnzm bli_zccastv bli_zccopysc bli_zcopyd bli_zcopyd_ex bli_zcopym bli_zcopym_ex bli_zcopyv bli_zcopyv_ex bli_zcxpbym_md bli_zcxpbym_md_ex bli_zdcastm bli_zdcastnzm bli_zdcastv bli_zdcopysc bli_zdivsc bli_zdotaxpyv bli_zdotaxpyv_ex bli_zdotv bli_zdotv_ex bli_zdotxaxpyf bli_zdotxaxpyf_ex bli_zdotxf bli_zdotxf_ex bli_zdotxv bli_zdotxv_ex bli_zdxpbym_md bli_zdxpbym_md_ex bli_zeqm bli_zeqsc bli_zeqv bli_zfprintm bli_zfprintv bli_zgemm bli_zgemm_ex bli_zgemmt bli_zgemmt_ex bli_zgemv bli_zgemv_ex bli_zger bli_zger_ex bli_zgetijm bli_zgetijv bli_zgetsc bli_zgtesc bli_zgtsc bli_zhemm bli_zhemm_ex bli_zhemv bli_zhemv_ex bli_zher bli_zher2 bli_zher2_ex bli_zher2k bli_zher2k_ex bli_zher_ex bli_zherk bli_zherk_ex bli_zinvertd bli_zinvertd_ex bli_zinvertsc bli_zinvertv bli_zinvertv_ex bli_zinvscald bli_zinvscald_ex bli_zinvscalm bli_zinvscalm_ex bli_zinvscalv bli_zinvscalv_ex bli_zipsc bli_zltesc bli_zltsc bli_zmachval bli_zmkherm bli_zmkherm_ex bli_zmksymm bli_zmksymm_ex bli_zmktrim bli_zmktrim_ex bli_zmulsc bli_znorm1m bli_znorm1m_ex bli_znorm1v bli_znorm1v_ex bli_znormfm bli_znormfm_ex bli_znormfsc bli_znormfv bli_znormfv_ex bli_znormim bli_znormim_ex bli_znormiv bli_znormiv_ex bli_zprintm bli_zprintv bli_zrandm bli_zrandm_ex bli_zrandnm bli_zrandnm_ex bli_zrandnv bli_zrandnv_ex bli_zrandv bli_zrandv_ex bli_zscal2d bli_zscal2d_ex bli_zscal2m bli_zscal2m_ex bli_zscal2v bli_zscal2v_ex bli_zscald bli_zscald_ex bli_zscalm bli_zscalm_ex bli_zscalv bli_zscalv_ex bli_zscastm bli_zscastnzm bli_zscastv bli_zscopysc bli_zsetd bli_zsetd_ex bli_zsetid bli_zsetid_ex bli_zsetijm bli_zsetijv bli_zsetm bli_zsetm_ex bli_zsetsc bli_zsetv bli_zsetv_ex bli_zshiftd bli_zshiftd_ex bli_zsqrtrsc bli_zsqrtsc bli_zsubd bli_zsubd_ex bli_zsubm bli_zsubm_ex bli_zsubsc bli_zsubv bli_zsubv_ex bli_zsumsqv bli_zsumsqv_ex bli_zswapv bli_zswapv_ex bli_zsxpbym_md bli_zsxpbym_md_ex bli_zsymm bli_zsymm_ex bli_zsymv bli_zsymv_ex bli_zsyr bli_zsyr2 bli_zsyr2_ex bli_zsyr2k bli_zsyr2k_ex bli_zsyr_ex bli_zsyrk bli_zsyrk_ex bli_ztrmm bli_ztrmm3 bli_ztrmm3_ex bli_ztrmm_ex bli_ztrmv bli_ztrmv_ex bli_ztrsm bli_ztrsm_ex bli_ztrsv bli_ztrsv_ex bli_zunzipsc bli_zxpbyd bli_zxpbyd_ex bli_zxpbym bli_zxpbym_ex bli_zxpbyv bli_zxpbyv_ex bli_zzcastm bli_zzcastnzm bli_zzcastv bli_zzcopysc bli_zzipsc bli_zzxpbym_md bli_zzxpbym_md_ex caxpby_ caxpy_ cblas_caxpby cblas_caxpy cblas_ccopy cblas_cdotc_sub cblas_cdotu_sub cblas_cgbmv cblas_cgemm cblas_cgemm3m cblas_cgemm_batch cblas_cgemmt cblas_cgemv cblas_cgerc cblas_cgeru cblas_chbmv cblas_chemm cblas_chemv cblas_cher cblas_cher2 cblas_cher2k cblas_cherk cblas_chpmv cblas_chpr cblas_chpr2 cblas_cscal cblas_csscal cblas_cswap cblas_csymm cblas_csyr2k cblas_csyrk cblas_ctbmv cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv cblas_dasum cblas_daxpby cblas_daxpy cblas_dcopy cblas_ddot cblas_dgbmv cblas_dgemm cblas_dgemm_batch cblas_dgemmt cblas_dgemv cblas_dger cblas_dnrm2 cblas_drot cblas_drotg cblas_drotm cblas_drotmg cblas_dsbmv cblas_dscal cblas_dsdot cblas_dspmv cblas_dspr cblas_dspr2 cblas_dswap cblas_dsymm cblas_dsymv cblas_dsyr cblas_dsyr2 cblas_dsyr2k cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_dzasum cblas_dznrm2 cblas_icamax cblas_idamax cblas_isamax cblas_izamax cblas_sasum cblas_saxpby cblas_saxpy cblas_scasum cblas_scnrm2 cblas_scopy cblas_sdot cblas_sdsdot cblas_sgbmv cblas_sgemm cblas_sgemm_batch cblas_sgemmt cblas_sgemv cblas_sger cblas_snrm2 cblas_srot cblas_srotg cblas_srotm cblas_srotmg cblas_ssbmv cblas_sscal cblas_sspmv cblas_sspr cblas_sspr2 cblas_sswap cblas_ssymm cblas_ssymv cblas_ssyr cblas_ssyr2 cblas_ssyr2k cblas_ssyrk cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm cblas_strsv cblas_xerbla cblas_zaxpby cblas_zaxpy cblas_zcopy cblas_zdotc_sub cblas_zdotu_sub cblas_zdscal cblas_zgbmv cblas_zgemm cblas_zgemm3m cblas_zgemm_batch cblas_zgemmt cblas_zgemv cblas_zgerc cblas_zgeru cblas_zhbmv cblas_zhemm cblas_zhemv cblas_zher cblas_zher2 cblas_zher2k cblas_zherk cblas_zhpmv cblas_zhpr cblas_zhpr2 cblas_zscal cblas_zswap cblas_zsymm cblas_zsyr2k cblas_zsyrk cblas_ztbmv cblas_ztbsv cblas_ztpmv cblas_ztpsv cblas_ztrmm cblas_ztrmv cblas_ztrsm cblas_ztrsv ccopy_ cdotc_ cdotcsub_ cdotu_ cdotusub_ cgbmv_ cgemm3m_ cgemm_ cgemm_batch_ cgemmt_ cgemv_ cgerc_ cgeru_ chbmv_ chemm_ chemv_ cher2_ cher2k_ cher_ cherk_ chpmv_ chpr2_ chpr_ crotg_ cscal_ csrot_ csscal_ cswap_ csymm_ csyr2k_ csyrk_ ctbmv_ ctbsv_ ctpmv_ ctpsv_ ctrmm_ ctrmv_ ctrsm_ ctrsv_ dasum_ dasumsub_ daxpby_ daxpy_ dcabs1_ dcopy_ ddot_ ddotsub_ dgbmv_ dgemm_ dgemm_batch_ dgemmt_ dgemv_ dger_ dnrm2_ dnrm2sub_ drot_ drotg_ drotm_ drotmg_ dsbmv_ dscal_ dsdot_ dsdotsub_ dspmv_ dspr2_ dspr_ dswap_ dsymm_ dsymv_ dsyr2_ dsyr2k_ dsyr_ dsyrk_ dtbmv_ dtbsv_ dtpmv_ dtpsv_ dtrmm_ dtrmv_ dtrsm_ dtrsv_ dzasum_ dzasumsub_ dznrm2_ dznrm2sub_ icamax_ icamaxsub_ idamax_ idamaxsub_ isamax_ isamaxsub_ izamax_ izamaxsub_ lsame_ sasum_ sasumsub_ saxpby_ saxpy_ scabs1_ scasum_ scasumsub_ scnrm2_ scnrm2sub_ scopy_ sdot_ sdotsub_ sdsdot_ sdsdotsub_ sgbmv_ sgemm_ sgemm_batch_ sgemmt_ sgemv_ sger_ snrm2_ snrm2sub_ srot_ srotg_ srotm_ srotmg_ ssbmv_ sscal_ sspmv_ sspr2_ sspr_ sswap_ ssymm_ ssymv_ ssyr2_ ssyr2k_ ssyr_ ssyrk_ stbmv_ stbsv_ stpmv_ stpsv_ strmm_ strmv_ strsm_ strsv_ xerbla_ xerbla_array_ zaxpby_ zaxpy_ zcopy_ zdotc_ zdotcsub_ zdotu_ zdotusub_ zdrot_ zdscal_ zgbmv_ zgemm3m_ zgemm_ zgemm_batch_ zgemmt_ zgemv_ zgerc_ zgeru_ zhbmv_ zhemm_ zhemv_ zher2_ zher2k_ zher_ zherk_ zhpmv_ zhpr2_ zhpr_ zrotg_ zscal_ zswap_ zsymm_ zsyr2k_ zsyrk_ ztbmv_ ztbsv_ ztpmv_ ztpsv_ ztrmm_ ztrmv_ ztrsm_ ztrsv_ blis-1.1/build/mirror-tree.sh000077500000000000000000000111001474157777200162270ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # print_usage() { local script_name # Get the script name script_name=${0##*/} # Echo usage info echo " " echo " "${script_name} echo " " echo " Field G. Van Zee" echo " " echo " Recursively descends through the directory given in argument 1 while" echo " creating a symmetric directory structure in the new directory specified" echo " by argument 2, ignoring regular files along the way." echo " " echo " Usage:" echo " ${script_name} [-v] existing_dir new_mirror_dir" echo " " echo " The following options are accepted:" echo " " echo " -v verbose" echo " Echo progress as directories are recursively created." echo " " # Exit with non-zero exit status exit 1 } main() { # Process our command line options. We only respond to the -v flag, # in which case we'll echo what we're doing as we go along. while getopts ":v" opt; do case $opt in v ) verbose_flag=1 ;; \? ) print_usage exit 1 esac done shift $(($OPTIND - 1)) # Check the number of arguments after command line option processing. if [ $# != "2" ]; then print_usage fi # Extract arguments. e_dir=$1 n_dir=$2 # If the root new directory does not exist, then create it. if [ ! -d $n_dir ]; then # Be verbose, if -v was one of the command line options. if [ -n "$verbose_flag" ]; then echo "Creating $n_dir" fi # Make the root new directory. Create the parent directories if # they do not exist with the -p option. mkdir -p $n_dir fi # Initialize the recursive variables. We keep a separate variable # for the existing and new directories because they have different # roots, but they will always change in parallel. cur_e_dir=$e_dir cur_n_dir=$n_dir # Begin recursion, starting with the contents of the existing # directory. mirror_tree "$(ls $e_dir)" # Exit peacefully. return 0 } mirror_tree() { # Extract arguments. dir_contents="$1" # Process each item in our argument list (ie: each item in cur_e_dir). for thing in ${dir_contents}; do # Adjust the current existing and new directory paths to # include the current instance of thing. cur_e_dir="$cur_e_dir/$thing" cur_n_dir="$cur_n_dir/$thing" # If the current existing directory exists, then create a # corresponding subdirectory in new directory. if [ -d ${cur_e_dir} ]; then # Be verbose, if -v was one of the command line options. if [ -n "$verbose_flag" ]; then echo "Creating $cur_n_dir" fi # Make the new subdirectory, but only if it doesn't # already exist. if [ ! -d $cur_n_dir ]; then mkdir $cur_n_dir fi # Continue recursively on the contents of cur_e_dir. mirror_tree "$(ls $cur_e_dir)" fi # Delete the end of the path, up to the first / character to # prepare for the next "thing" in $@. cur_e_dir=${cur_e_dir%/*} cur_n_dir=${cur_n_dir%/*} done } # The script's main entry point, passing all parameters given. main "$@" blis-1.1/build/old/000077500000000000000000000000001474157777200142065ustar00rootroot00000000000000blis-1.1/build/old/flatten-headers-pass.sh000077500000000000000000000420061474157777200205610ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # -- Helper functions ---------------------------------------------------------- # print_usage() { # Echo usage info. echo " " echo " ${script_name}" echo " " echo " Field G. Van Zee" echo " " echo " Generate a monolithic header by recursively replacing all #include" echo " directives in a selected file with the contents of the header files" echo " they reference." echo " " echo " Usage:" echo " " echo " ${script_name} header header_out dir_list" echo " " echo " Arguments:" echo " " echo " header The filepath to the top-level header, which is file that" echo " will #include all other header files. NOTE: It is okay if" echo " this file resides somewhere in root_dir, described below." echo " " echo " header_out The filepath of the file into which the script will output" echo " the monolithic header." echo " " echo " dir_list The list of directory paths in which to search for the" echo " headers that are #included by 'header'. By default, these" echo " directories are scanned for .h files, but sub-directories" echo " within the various directories are not inspected. If the" echo " -r option is given, these directories are recursively" echo " scanned. In either case, the subset of directories scanned" echo " that actually contains .h files is then searched whenever" echo " a #include directive is encountered in 'header' (or any" echo " file subsequently #included). If a referenced header file" echo " is not found, the #include directive is left untouched and" echo " translated directly into 'header_out'." echo " " echo " The following options are accepted:" echo " " echo " -r recursive" echo " Scan the directories listed in 'dir_list' recursively when" echo " searching for .h header files. By default, the directories" echo " are not searched recursively." echo " " echo " -c strip C-style comments" echo " Strip comments enclosed in /* */ delimiters from the" echo " output, including multi-line comments. (This only applies" echo " to #included headers; C-style comments in the top-level" echo " 'header' are never stripped.) By default, C-style comments" echo " are not stripped." echo " " echo " -o SCRIPT output script name" echo " Use SCRIPT as a prefix when outputting messages instead" echo " the script's actual name. Useful when the current script" echo " is going to be called from within another, higher-level" echo " driver script and seeing the current script's name might" echo " unnecessarily confuse the user." echo " " echo " -q quiet" echo " Suppress informational output. By default, the script is" echo " verbose." echo " " echo " -h help" echo " Output this information and exit." echo " " # Exit with non-zero exit status exit 1 } canonicalize_ws() { local str="$1" # Remove leading and trailing whitespace. str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') # Remove duplicate spaces between words. str=$(echo -e "${str}" | tr -s " ") # Update the input argument. echo "${str}" } is_word_in_list() { word="$1" list="$2" rval="" for item in ${list}; do if [ "${item}" == "${word}" ]; then rval="${word}" break fi done echo "${rval}" } echoinfo() { if [ -z "${quiet_flag}" ]; then # Echo the argument string to stderr instead of stdout. echo "${output_name}: $1" 1>&2; fi } echoninfo() { if [ -z "${quiet_flag}" ]; then # Echo the argument string to stderr instead of stdout. echo -n "${output_name}: $1" 1>&2; fi } echon2info() { if [ -z "${quiet_flag}" ]; then # Echo the argument string to stderr instead of stdout. echo "$1" 1>&2; fi } find_header_dirs() { local cur_dirpath sub_items result cur_list item child_list # Extract the argument: the current directory, and the list of # directories found so far that contain headers. cur_dirpath="$1" echoninfo "scanning contents of ${cur_dirpath}" # Acquire a list of the directory's contents. sub_items=$(ls ${cur_dirpath}) # If there is at least one header present, add the current directory to # the list header of directories. Otherwise, the current directory does # not contribute to the list returned to the caller. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then cur_list="${cur_dirpath}" echon2info " ...found headers" else cur_list="" echon2info "" fi # Iterate over the list of directory contents. for item in ${sub_items}; do # Check whether the current item is in the ignore_list. If so, we # ignore it. result=$(is_word_in_list "${item}" "${ignore_list}") if [ -n "${result}" ]; then echoinfo "ignoring directory '${item}'." continue fi # If the current item is a directory, recursively accumulate header # directories for that sub-directory. if [ -d "${cur_dirpath}/${item}" ]; then # Recursively find header directories within the sub-directory # ${item} and store the directory list to child_list. child_list=$(find_header_dirs "${cur_dirpath}/${item}") # Accumulate the sub-directory's header list with the running list # of header directories cur_list="${cur_list} ${child_list}" fi done # Return the list of header directories. echo "${cur_list}" } get_header_path() { local filename dirpaths filepath filename="$1" dirpaths="$2" filepath="" # Search each directory path for the filename given. for dirpath in ${dirpaths}; do if [ -f "${dirpath}/${filename}" ]; then filepath="${dirpath}/${filename}" break fi done # Return the filepath that was found. Note that if no filepath was found # in the loop above, the empty string gets returned. echo "${filepath}" } replace_pass() { local filename dirpaths result header headerlist filename="$1" dirpaths="$2" # This string is inserted after #include directives after having # determined that they are not present in the directory tree and should # be ignored when assessing whether there are still #include directives # that need to be expanded. Note that it is formatted as a comment and # thus will be ignored when the monolithic header is eventually read C # preprocessor and/or compiler. skipstr="\/\/skipped" #skipstr="\/\*skipped\*\/" # The way we (optionally) remove C-style comments results in a single # blank line in its place (regardless of how many lines the comment # spanned. When a comment is removed, it is replaced by this string # so that the line can be deleted with a subsequent sed command. commstr="DeLeTeDCsTyLeCoMmEnT" headerlist="" # Iterate through each line of the header file, accumulating the names of # header files referenced in #include directives. while read -r curline do # Check whether the line begins with a #include directive, but ignore # the line if it contains the skip string. result=$(echo ${curline} | grep '^[[:space:]]*#include ' | grep -v "${skipstr}") # If the #include directive was found... if [ -n "${result}" ]; then # Isolate the header filename. We must take care to include all # characters that might appear between the "" or <>. header=$(echo ${curline} | sed -e "s/#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g") # Add the header file to a list. headerlist=$(canonicalize_ws "${headerlist} ${header}") fi done < "${filename}" echoinfo " found references to: ${headerlist}" # Initialize the return value to null. result="" # Iterate over each header file found in the previous loop. for header in ${headerlist}; do # Find the path to the header. header_filepath=$(get_header_path ${header} "${dirpaths}") # If the header has a slash, escape it so that sed doesn't get confused # (since we use '/' as our search-and-replace delimiter). header_esc=$(echo "${header}" | sed -e 's/\//\\\//g') # If the header file was not found, get_header_path() returns an # empty string. This probably means that the header file is a # system header and thus we skip it since we don't want to inline # the contents of system headers anyway. if [ -z "${header_filepath}" ]; then echoinfo " could not locate file '${header}'; marking to skip." # Insert a comment after the #include so we know to ignore it # later. Notice that we mimic the quotes or angle brackets # around the header name, whichever pair was used in the input. cat ${filename} \ | sed -e "s/^[[:space:]]*#include \([\"<]\)\(${header_esc}\)\([\">]\).*/#include \1\2\3 ${skipstr}/" \ > "${filename}.tmp" # Overwrite the original file with the updated copy. mv "${filename}.tmp" ${filename} else echoinfo " located file '${header_filepath}'; inserting." # Strip C-style comments from the file, if requested. if [ -n "${strip_comments}" ]; then header_filename=${header_filepath##*/} # Make a temporary copy of ${header_filepath} stripped of its # C-style comments. This leaves behind a single blank line, # which is then deleted. cat ${header_filepath} \ | perl -0777 -pe "s/\/\*.*?\*\//${commstr}/gs" \ | sed -e "/${commstr}/d" \ > "${header_filename}.tmp" header_to_insert="${header_filename}.tmp" else header_to_insert="${header_filepath}" fi # Replace the #include directive for the current header file with the # contents of that header file, saving the result to a temporary file. # We also insert begin and end markers to allow for more readability. cat ${filename} \ | sed -e "/^[[:space:]]*#include \"${header_esc}\"/ {" \ -e "i // begin ${header}" \ -e "r ${header_to_insert}" \ -e "a // end ${header}" \ -e "d" \ -e "}" \ > "${filename}.tmp" # Overwrite the original header file with the updated copy. mv "${filename}.tmp" ${filename} # If C-style comments were stripped, remove the temporary file. if [ -n "${strip_comments}" ]; then rm "${header_filename}.tmp" fi fi done # works, but leaves blank line: #cat "test.h" | sed -e "/^#include \"foo.h\"/r foo.h" -e "s///" > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/ {' -e 'r foo.h' -e 'd' -e '}' > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/r foo.h' -e '/^#include \"foo.h\"/d' > "test.new.h" #cat zorn/header.h | sed -e '/^#include \"header1.h\"/ {' -e 'i // begin insertion' -e 'r alice/header1.h' -e 'a // end insertion' -e 'd' -e '}' # Search the updated file for #include directives, but ignore any # hits that also contain the skip string (indicating that the header # file referenced by that #include could not be found). result=$(cat ${filename} | grep '^[[:space:]]*#include ' | grep -v "${skipstr}") # Return the result so the caller knows if we need to proceed with # another pass. echo ${result} } # # -- main function ------------------------------------------------------------- # main() { # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The script name to use in informational output. Defaults to ${script_name}. output_name=${script_name} # Whether or not we should strip C-style comments from the outout. (Default # is to not strip C-style comments.) strip_comments="" # Whether or not we search the directories in dir_list recursively. (Default # is to not search recursively.) recursive_flag="" # Whether or not we should suppress informational output. (Default is to # output messages.) quiet_flag="" # The list of directories to ignore ignore_list="old other temp test testsuite windows" # Process our command line options. while getopts ":o:rcqh" opt; do case $opt in o ) output_name=$OPTARG ;; r ) recursive_flag="1" ;; c ) strip_comments="1" ;; q ) quiet_flag="1" ;; h ) print_usage ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Print usage if we don't have exactly two arguments. if [ $# != "3" ]; then print_usage fi # Acquire the two required arguments: # - the input header file, # - the output header file, # - the list of directories in which to search for the headers inputfile="$1" outputfile="$2" dir_list="$3" # First, confirm that the directories in dir_list are valid. dir_list2="" for item in ${dir_list}; do # Strip a trailing slash from the path, if it has one. item=${item%/} echoninfo "checking ${item} " if [ -d ${item} ]; then echon2info " ...directory exists." dir_list2="${dir_list2} ${item}" else echon2info " ...invalid directory; omitting." fi done dir_list2=$(canonicalize_ws "${dir_list2}") # Overwrite the original dir_list with the updated copy that omits # invalid directories. dir_list="${dir_list2}" echoinfo "check summary:" echoinfo " accessible directories:" echoinfo " ${dir_list}" # Generate a list of directories (dirpaths) which will be searched whenever # a #include directive is encountered. The method by which dirpaths is # compiled will depend on whether the recursive flag was given. if [ -n "${recursive_flag}" ]; then # If the recursive flag was given, we need to recursively scan each # directory in dir_list for directories with headers via the # function find_header_dirs(). dirpaths="" for item in ${dir_list}; do item_dirpaths=$(find_header_dirs ${item}) dirpaths="${dirpaths} ${item_dirpaths}" done dirpaths=$(canonicalize_ws "${dirpaths}") else # If the recursive flag was not given, we can just use dir_list # as-is, though we opt to filter out the directories that don't # contain .h files. dirpaths="" for item in ${dir_list}; do echoninfo "scanning ${item}" # Acquire a list of the directory's contents. sub_items=$(ls ${item}) # If there is at least one header present, add the current directory to # the list header of directories. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then dirpaths="${dirpaths} ${item}" echon2info " ...found headers." else echon2info " ...no headers found." fi done dirpaths=$(canonicalize_ws "${dirpaths}") fi echoinfo "scan summary:" echoinfo " headers found in:" echoinfo " ${dirpaths}" echoinfo "preparing to monolithify '${inputfile}'." # Make a copy of the inputfile. cp ${inputfile} ${outputfile} echoinfo "new header will be saved to '${outputfile}'." done_flag="0" while [ ${done_flag} == "0" ]; do echoinfo "starting new pass." # Perform a replacement pass. The return string is non-null if # additional passes are necessary and null otherwise. result=$(replace_pass ${outputfile} "${dirpaths}") if [ -n "${result}" ]; then echoinfo "pass finished; result: additional pass(es) needed." else echoinfo "pass finished; result: no further passes needed." fi #exit 1 # If the return value was null, then we're done. if [ -z "${result}" ]; then done_flag="1" fi done echoinfo "substitution complete." echoinfo "monolithic header saved as '${outputfile}'." # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-1.1/build/old/regen-symbols.sh000077500000000000000000000063631474157777200173430ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name of copyright holder(s) nor the names # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # This script regenerates a list of symbols for use when building # Windows-compatible DLLs. We assume that this script will be run after # running configure as: # # ./configure --enable-cblas haswell # # and compiling BLIS normally. (Notice that we also prune out all # haswell/zen-related context initialization and reference kernels.) # libblis='lib/haswell/libblis.so' symfile='build/libblis-symbols.def' echo "EXPORTS" > def.exports #nm -g ${libblis} | grep -o " D BLIS_.*" | cut -f2- "-dD" > def.blis_const nm -g ${libblis} | grep -o " T bli_.*" | cut -f2- "-dT" > def.blis nm -g ${libblis} | grep -o " T bla_.*" | cut -f2- "-dT" > def.blis_bla nm -g ${libblis} | grep -o " T cblas_.*" | cut -f2- "-dT" > def.blis_cblas nm -g ${libblis} | grep -o " T s[acdgnrst].*" | cut -f2- "-dT" > def.blas_s nm -g ${libblis} | grep -o " T d[acdgnrstz].*" | cut -f2- "-dT" > def.blas_d nm -g ${libblis} | grep -o " T c[acdghrst].*" | cut -f2- "-dT" > def.blas_c nm -g ${libblis} | grep -o " T z[acdghrst].*" | cut -f2- "-dT" > def.blas_z nm -g ${libblis} | grep -o " T i[cdsz].*" | cut -f2- "-dT" > def.blas_i cat def.exports \ def.blis \ def.blis_bla \ def.blas_s \ def.blas_d \ def.blas_c \ def.blas_z \ def.blas_i \ def.blis_cblas \ | cut -f2- "-d " \ | grep -v init_haswell \ | grep -v haswell_ref \ | grep -v zen_ref \ > ${symfile} rm -f \ def.exports \ def.blis \ def.blis_bla \ def.blas_s \ def.blas_d \ def.blas_c \ def.blas_z \ def.blas_i \ def.blis_cblas blis-1.1/build/recu-sed.sh000077500000000000000000000332641474157777200155060ustar00rootroot00000000000000#!/bin/bash # # recursive-sed.sh # # Field G. Van Zee # print_usage() { # Echo usage info echo " " echo " "$script_name echo " " echo " Field G. Van Zee" echo " " echo " Recusively descend a directory tree and perform sed commands, either on" echo " the filename or the file contents, or both." echo " " echo " Usage:" echo " ${script_name} [options]" echo " " echo " The following options are accepted:" echo " " echo " -d " echo " Dry run. Go through all the motions, but don't actually" echo " apply any of the sed expressions to file names or contents." echo " -N " echo " Do not proceed recursively into subdirectories; consider" echo " only the files within the current directory. Default" echo " behavior is to act recursively." echo " -h " echo " Consider hidden files and directories. Default behavior is" echo " to ignore them." echo " -n " echo " Use svn mv instead of mv when renaming the file." echo " Notice that this only applies if the filename changes." echo " -p pattern " echo " Specifies the filename pattern, as would be given to the" echo " ls utility, to limit which files are affected. Default is" echo " the to consider all files present." echo " -r dir" echo " The root directory for the recursive action to be performed." echo " Default is to use the current working directory." echo " -v [0|1|2]" echo " verboseness level" echo " level 0: silent (no output)" echo " level 1: default (one line per directory; supress ls stderr)" echo " level 2: verbose (one line per directory; show ls stderr)" echo " " echo " At least one of the following option-argument pairs is required:" echo " " echo " -f sed_expr " echo " Specifies the sed expression that will be applied to the" echo " filenames of the files touched by the script. This expression" echo " must be a search-and-replace pattern." echo " -c sed_expr " echo " Specifies the sed expression that will be applied to the" echo " contents of the files touched by the script. This expression" echo " should be a search-and-replace pattern." echo " -s sed_script" echo " Specifies an arbitrary sed script that will be applied to the" echo " file contents of the files touched by the script." echo " " echo " Note: -c and -s options are mutually exclusive." echo " " # Exit with non-zero exit status exit 1 } perform_sed() { # Variables set by getopts. local exist_dir="$1" #echo "exist_dir: $exist_dir" # The suffix used to create temporary files local temp_file_suffix="sed_temp" # Check that exist_dir actually exists and is a directory if [ ! -d "${exist_dir}" ]; then echo "${script_name}: ${exist_dir} does not seem to be a valid directory." exit 1 fi # Check that the filename sed expression, if given, begins with an 's'. if [ -n "$filename_sed_expr" ]; then # If it's a valid search-and-replace expression, this should return an 's'. filename_sed_char=${filename_sed_expr%%/*} if [ "$filename_sed_char" != "s" ]; then echo "${script_name}: sed expression given with -f must be search-and-replace." exit 1 fi fi # Check that the sed script, if given, exists. if [ -n "$contents_sed_script" ]; then if [ ! -f ${contents_sed_script} ]; then echo "${script_name}: ${contents_sed_script} is not a regular file or does not exist." exit 1 fi fi # Assume that the sed expression is a search-and-replace. Extract the patterns # to match on. (Arbitrary sed expressions should be applied through a sed script.) if [ "$filename_sed_expr" != "" ]; then filename_sed_match=${filename_sed_expr#s/} filename_sed_match=${filename_sed_match%%/*} fi # Get the list of source files in the directory given. Supress stderr if # level 0 or 1 verbosity was requested. #if [ "$verbose_level" != "2" ]; then # old_filepaths=$(ls -d -b ${exist_dir}/${filename_pattern} 2> /dev/null) #else # old_filepaths="$(ls -d -b ${exist_dir}/${filename_pattern})" #fi #echo $old_filepaths #echo "$exist_dir/$filename_pattern" #for old_filepath in $old_filepaths; do #echo "exist_dir: $exist_dir" # Find all files that match the pattern in the current directory. find "${exist_dir}" -maxdepth 1 -name "${filename_pattern}" -print | while read old_filepath do #echo "old_filepath: $old_filepath" # Skip the current directory. if [ "${old_filepath}" == "${exist_dir}" ]; then continue fi # Skip any non-regular files. if [ ! -f "$old_filepath" ]; then # And say we are doing so if verboseness was requested. if [ "$verbose_level" = "2" ]; then echo "${script_name}: Ignoring $old_filepath" fi continue fi # Strip exist_dir from filename. old_filename=${old_filepath##*/} # Strip the filename from old_filepath to leave the directory path. old_dirpath=${old_filepath%/*} # Create a new filename from the old one. If a filename sed expression was given, # it will be applied now. if [ "$filename_sed_expr" != "" ]; then new_filename=$(echo "${old_filename}" | sed "${filename_sed_expr}") else new_filename="${old_filename}" fi #echo "new_filename: $new_filename" # Create the filepath to the new file location. new_filepath="${old_dirpath}/${new_filename}" #echo "new_filepath: $new_filepath" # Grep for the filename pattern within the filename of the current file. if [ "$filename_sed_expr" != "" ]; then grep_filename=$(echo "${old_filename}" | grep "${filename_sed_match}") fi # If we are not performing a dry run, proceed. if [ -z "$dry_run_flag" ]; then # Save the old file permissions so we can re-apply them to the # new file if its contents change (ie: if it's not just a 'mv', # which inherently preserves file permissions). old_perms=$(stat -c %a "${old_filepath}") # If the old and new filepaths are different, then we start off by # renaming the file. (Otherwise, if the old and new filepaths are # identical, then we don't need to do anything to the file.) If # the user requested that we use svn mv, then do that, otherwise we # use regular mv. if [ "${old_filepath}" != "${new_filepath}" ]; then if [ -n "$use_svn_mv_flag" ]; then svn mv "${old_filepath}" "${new_filepath}" else mv -f "${old_filepath}" "${new_filepath}" fi fi #else # A dry run still needs the act upon the "new" file, so if the # filepaths are different, simply set the new filepath to the # old one. (We won't need the previous value of new_filepath # anymore.) #if [ "${old_filepath}" != "${new_filepath}" ]; then # new_filepath="${old_filepath}" #fi fi # Handle the cases that might change the contents of the file. if [ "$contents_sed_expr" != "" ] || [ "$contents_sed_script" != "" ]; then # Execute the sed command based on whether the sed action was given # as a command line expression or a script residing in a file. if [ "$contents_sed_script" != "" ]; then # Perform the action, saving the result to a temporary file. cat "${new_filepath}" | sed -f ${contents_sed_script} \ > ${new_filepath}.${temp_file_suffix} elif [ "$contents_sed_expr" != "" ]; then # Perform the action, saving the result to a temporary file. cat "${new_filepath}" | sed -e "${contents_sed_expr}" \ > ${new_filepath}.${temp_file_suffix} fi # Check the difference. file_diff=$(diff "${new_filepath}" "${new_filepath}.${temp_file_suffix}") # If we are not performing a dry run, proceed. if [ -z "$dry_run_flag" ]; then # If the file contents change. if [ -n "$file_diff" ]; then # Apply the old file permissions to the new file (before we # potentially overwrite the old file with the new one). chmod ${old_perms} "${new_filepath}.${temp_file_suffix}" # Apply the file contents changes to the new filepath (which may # or may not be the same as the old filepath). mv -f "${new_filepath}.${temp_file_suffix}" "${new_filepath}" else # Otherwise remove the new temporary file since it is identical # to the original. rm -f "${new_filepath}.${temp_file_suffix}" fi else # Simply remove the file since we are only performing a dry run. rm -f "${new_filepath}.${temp_file_suffix}" fi fi # Check for dos2unix. If it's not here, we'll just substitute cat. #type_dos2unix=$(type -path dos2unix) #if [ -n "$type_dos2unix" ]; then # dos2unix -q ${new_filepath} #fi # Create a string that indicates what we are changing. We'll use this in # the verbose progress echo to indicate how the file is or would be changed. if [ -n "$grep_filename" ] && [ -n "$file_diff" ]; then which_matches="filename/contents" file_touched="yes" elif [ -n "$grep_filename" ] && [ -z "$file_diff" ]; then which_matches="filename " file_touched="yes" elif [ -z "$grep_filename" ] && [ -n "$file_diff" ]; then which_matches=" contents" file_touched="yes" else which_matches="" file_touched="no" fi # Be verbose, if requested, about which file we're looking at. if [ "$verbose_level" != "0" ]; then # But we only need to output a line if the file was touched. if [ "$file_touched" != "no" ]; then # Construct a relative filepath by stripping the initial root # directory so that the output does not span as many columns on # the terminal. rel_old_filepath=${old_filepath#${initial_root_dir}/} # Add a "dry run" condition to the output if we're doing a dry-run # so that the user knows we didn't really change anything. if [ -z "$dry_run_flag" ]; then echo "$script_name: Changing [${which_matches}] of ${rel_old_filepath}" else echo "$script_name: Changing (dry run) [${which_matches}] of ${rel_old_filepath}" fi fi fi done # Exit peacefully. return 0 } recursive_sed() { # Local variable declarations local item sub_items curr_dir this_dir # Extract our argument curr_dir="$1" # Call our function to perform the sed operations on the files in the # directory given. perform_sed "${curr_dir}" # If we were asked to act recursively, then continue processing # curr_dir's contents. if [ "$recursive_flag" = "1" ]; then # Get a listing of items in the directory according to the hidden # files/directories flag. if [ -n "$hidden_files_dirs_flag" ]; then # Get a listing of the directories in curr_dir (including hidden # files and directories). sub_items=$(ls -a "$curr_dir") else # Get a listing of the directories in curr_dir. sub_items=$(ls "$curr_dir") fi #echo "sub_items: $sub_items" # Descend into the contents of curr_dir, calling recursive_sed on # any items that are directories. find "${curr_dir}" -maxdepth 1 -name "*" -print | while read item do #echo "conisdering item: $item" # Skip the current directory. if [ "${item}" == "${curr_dir}" ]; then continue fi # If item is a directory, descend into it. if [ -d "$item" ]; then #echo "item is dir: $item" recursive_sed "$item" fi done fi # Return peacefully return 0 } main() { # Variables set by getopts. dry_run_flag="" hidden_files_dirs_flag="" use_svn_mv_flag="" filename_pattern="" root_dir="" initial_root_dir="" verbose_level="" filename_sed_expr="" contents_sed_expr="" contents_sed_script="" recursive_flag="1" # Get the script name script_name=${0##*/} # Local variable declarations. local item sub_items this_dir # Process our command line options. while getopts ":c:df:hp:r:s:nNv:" opt; do case $opt in d ) dry_run_flag="1" ;; h ) hidden_files_dirs_flag="1" ;; n ) use_svn_mv_flag="1" ;; N ) recursive_flag="0" ;; v ) verbose_level="$OPTARG" ;; p ) filename_pattern="$OPTARG" ;; r ) root_dir="$OPTARG" ;; f ) filename_sed_expr="$OPTARG" ;; c ) contents_sed_expr="$OPTARG" ;; s ) contents_sed_script="$OPTARG" ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Make sure we've parsed all command line arguments by now. if [ $# != "0" ]; then echo "${script_name}: Unparsed command line arguments! Try running with no arguments for help." exit 1 fi # Make sure we received at least one of the required options. if [ -z "$filename_sed_expr" ] && [ -z "$contents_sed_expr" ] && [ -z "$contents_sed_script" ]; then print_usage fi # Make sure that both a file contents sed expression and sed script were # not given. if [ "$contents_sed_expr" != "" ] && [ "$contents_sed_script" != "" ] ; then echo "${script_name}: The -c and -s options may not be used at the same time." exit 1 fi # Make sure that verboseness level is valid. if [ "$verbose_level" != "0" ] && [ "$verbose_level" != "1" ] && [ "$verbose_level" != "2" ]; then verbose_level="1" fi # Prepare the filename pattern arguments to perform_sed(). if [ "$filename_pattern" = "" ] ; then filename_pattern='*' fi # Prepare the directory arguments to perform_sed(). if [ "$root_dir" != "" ] ; then # Strip / from end of directory paths, if there is one. root_dir=${root_dir%/} else root_dir=$PWD fi initial_root_dir=${root_dir} #echo "root_dir: $root_dir" # Begin recursing on the root directory. recursive_sed "$root_dir" # Exit peacefully return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-1.1/build/so_version000066400000000000000000000000061474157777200155350ustar00rootroot000000000000004 0.0 blis-1.1/build/templates/000077500000000000000000000000001474157777200154265ustar00rootroot00000000000000blis-1.1/build/templates/license.c000066400000000000000000000032741474157777200172220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ blis-1.1/build/templates/license.h000066400000000000000000000032741474157777200172270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ blis-1.1/build/templates/license.sh000066400000000000000000000033251474157777200174070ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2019, The University of Texas at Austin # Copyright (C) 2018, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # blis-1.1/build/version000066400000000000000000000000041474157777200150320ustar00rootroot000000000000001.1 blis-1.1/common.mk000066400000000000000000001354771474157777200141730ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once ifndef COMMON_MK_INCLUDED COMMON_MK_INCLUDED := yes # # --- CFLAGS storage functions ------------------------------------------------- # # Define a function that stores the value of a variable to a different # variable containing a specified suffix (corresponding to a configuration). define store-var-for $(strip $(1)).$(strip $(2)) := $($(strip $(1))) endef # Define a function similar to store-var-for, except that appends instead # of overwriting. define append-var-for $(strip $(1)).$(strip $(2)) += $($(strip $(1))) endef # Define a function that stores the value of all of the variables in a # make_defs.mk file to other variables with the configuration (the # argument $(1)) added as a suffix. This function is called once from # each make_defs.mk. Also, add the configuration to CONFIGS_INCL. define store-make-defs $(eval $(call store-var-for,CC, $(1))) $(eval $(call store-var-for,CC_VENDOR, $(1))) $(eval $(call store-var-for,CPPROCFLAGS,$(1))) $(eval $(call store-var-for,CLANGFLAGS, $(1))) $(eval $(call store-var-for,CXXLANGFLAGS,$(1))) $(eval $(call store-var-for,CMISCFLAGS, $(1))) $(eval $(call store-var-for,CPICFLAGS, $(1))) $(eval $(call store-var-for,CWARNFLAGS, $(1))) $(eval $(call store-var-for,CDBGFLAGS, $(1))) $(eval $(call store-var-for,COPTFLAGS, $(1))) $(eval $(call store-var-for,CKOPTFLAGS, $(1))) $(eval $(call store-var-for,CKVECFLAGS, $(1))) $(eval $(call store-var-for,CROPTFLAGS, $(1))) $(eval $(call store-var-for,CRVECFLAGS, $(1))) CONFIGS_INCL += $(1) endef # Define a function that retreives the value of a variable for a # given configuration. define load-var-for $($(strip $(1)).$(strip $(2))) endef # # --- CFLAGS query functions --------------------------------------------------- # # Define some functions that return the appropriate CFLAGS for a given # configuration. This assumes that the make_defs.mk files have already been # included, which results in those values having been stored to # configuration-qualified variables. get-noopt-cflags-for = $(strip $(CFLAGS_PRESET) \ $(call load-var-for,CDBGFLAGS,$(1)) \ $(call load-var-for,CWARNFLAGS,$(1)) \ $(call load-var-for,CPICFLAGS,$(1)) \ $(call load-var-for,CMISCFLAGS,$(1)) \ $(call load-var-for,CLANGFLAGS,$(1)) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ $(CTHREADFLAGS) \ $(CINCFLAGS) \ ) get-noopt-cxxflags-for = $(strip $(CFLAGS_PRESET) \ $(call load-var-for,CDBGFLAGS,$(1)) \ $(call load-var-for,CWARNFLAGS,$(1)) \ $(call load-var-for,CPICFLAGS,$(1)) \ $(call load-var-for,CMISCFLAGS,$(1)) \ $(call load-var-for,CXXLANGFLAGS,$(1)) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ $(CTHREADFLAGS) \ $(CXXTHREADFLAGS) \ $(CINCFLAGS) \ ) get-refinit-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ -DBLIS_CNAME=$(1) \ $(BUILD_ASANFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ -DBLIS_IN_REF_KERNEL=1 \ -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ ) get-refkern-cflags-for = $(strip $(call load-var-for,CROPTFLAGS,$(1)) \ $(call load-var-for,CRVECFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(COMPSIMDFLAGS) \ -DBLIS_CNAME=$(1) \ $(BUILD_ASANFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ -DBLIS_IN_REF_KERNEL=1 \ -include $(CONFIG_PATH)/$(1)/bli_kernel_defs_$(1).h \ ) get-config-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(BUILD_ASANFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-frame-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(BUILD_ASANFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-frame-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cxxflags-for,$(1)) \ $(BUILD_ASANFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-kernel-cflags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ $(call load-var-for,CKVECFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) # When compiling addons, we use flags similar to those of general framework # source. This ensures that the same code can be linked and run across various # sub-configurations. get-addon-c99flags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(CADDONINCFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-addon-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cxxflags-for,$(1)) \ $(CADDONINCFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) # When compiling addon kernels, we use flags similar to those of kernels # flags, except we also include the addon header paths. get-addon-kernel-c99flags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ $(call load-var-for,CKVECFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(CADDONINCFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) # When compiling sandboxes, we use flags similar to those of general framework # source. This ensures that the same code can be linked and run across various # sub-configurations. (NOTE: If we ever switch to using refkernel or kernel # flags, we should prevent enabling sandboxes for umbrella families by verifying # that config_list == config_name if --enable-sandbox is given. THIS ALSO # APPLIES TO ADDONS ABOVE.) get-sandbox-c99flags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(CSANDINCFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-sandbox-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cxxflags-for,$(1)) \ $(CSANDINCFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) # Define a separate function that will return appropriate flags for use by # applications that want to use the same basic flags as those used when BLIS # was compiled. (NOTE: This is the same as the $(get-frame-cflags-for ...) # function, except that it omits a few variables that contain flags exclusively # for use when BLIS is being compiled/built: # - BUILD_CPPFLAGS, which contains a cpp macro that confirms that BLIS # is being built; # - BUILD_SYMFLAGS, which contains symbol export flags that are only # needed when a shared library is being compiled/linked; and # - BUILD_ASANFLAGS, which contains a flag that causes the compiler to # insert instrumentation for memory error detection. get-user-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ ) # Define functions that return messages appropriate for each non-verbose line # of compilation output. get-noopt-text = "(CFLAGS for no optimization)" get-refinit-text-for = "('$(1)' CFLAGS for ref. kernel init)" get-refkern-text-for = "('$(1)' CFLAGS for ref. kernels)" get-config-text-for = "('$(1)' CFLAGS for config code)" get-frame-text-for = "('$(1)' CFLAGS for framework code)" get-frame-cxxtext-for = "('$(1)' CXXFLAGS for framework code)" get-kernel-text-for = "('$(1)' CFLAGS for kernels)" get-addon-c99text-for = "('$(1)' CFLAGS for addons)" get-addon-cxxtext-for = "('$(1)' CXXFLAGS for addons)" get-addon-kernel-text-for = "('$(1)' CFLAGS for addon kernels)" get-sandbox-c99text-for = "('$(1)' CFLAGS for sandboxes)" get-sandbox-cxxtext-for = "('$(1)' CXXFLAGS for sandboxes)" # # --- Miscellaneous helper functions ------------------------------------------- # # Define functions that filters a list of filepaths $(1) that contain (or # omit) an arbitrary substring $(2). files-that-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),$(f),))) files-that-dont-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),,$(f)))) # Define a function that removes duplicate strings *without* using the sort # function. rm-dups = $(if $1,$(firstword $1) $(call rm-dups,$(filter-out $(firstword $1),$1))) # # --- Include makefile configuration file -------------------------------------- # # Use the current directory as the default path to the root directory for # makefile fragments (and the configuration family's make_defs.mk), but # allow the includer to override this value if it needs to point to an # installation directory. ifeq ($(strip $(SHARE_PATH)),) SHARE_PATH := . endif # Define the name of the configuration file. CONFIG_MK_FILE := config.mk # Identify the base path for the root directory for makefile fragments (and # the configuration family's make_defs.mk). We define this path in terms of # SHARE_PATH, which gets a default value above (which is what happens for the # top-level Makefile). If SHARE_PATH is specified by the Makefile prior to # including common.mk, that path is used instead. This allows Makefiles for # example code and test drivers to reference an installed prefix directory # for situations when the build directory no longer exists. BASE_SHARE_PATH := $(SHARE_PATH) # Include the configuration file. -include $(BASE_SHARE_PATH)/$(CONFIG_MK_FILE) # # --- Handle 'make clean' and friends without config.mk ------------------------ # # Detect whether we actually got the configuration file. If we didn't, then # it is likely that the user has not yet generated it (via configure). ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes) CONFIG_MK_PRESENT := yes IS_CONFIGURED := yes else CONFIG_MK_PRESENT := no IS_CONFIGURED := no endif # If we didn't get config.mk, then we need to set some basic variables so # that make will function without error for things like 'make clean'. ifeq ($(IS_CONFIGURED),no) # If this makefile fragment is being run and there is no config.mk present, # then it's probably safe to assume that the user is currently located in the # source distribution. DIST_PATH := . # Even though they won't be used explicitly, it appears that setting these # INSTALL_* variables to something sane (that is, not allowing them default # to the empty string) is necessary to prevent make from hanging, likely # because the statements that define UNINSTALL_LIBS and UNINSTALL_HEADERS, # when evaluated, result in running 'find' on the root directory--definitely # something we would like to avoid. INSTALL_LIBDIR := $(HOME)/blis/lib INSTALL_INCDIR := $(HOME)/blis/include INSTALL_SHAREDIR := $(HOME)/blis/share endif # # --- Primary makefile variable definitions ------------------------------------ # # Construct the architecture-version string, which will be used to name the # library upon installation. VERS_CONF := $(VERSION)-$(CONFIG_NAME) # All makefile fragments in the tree will have this name. FRAGMENT_MK := .fragment.mk # Locations of important files. BUILD_DIR := build CONFIG_DIR := config FRAME_DIR := frame REFKERN_DIR := ref_kernels KERNELS_DIR := kernels ADDON_DIR := addon SANDBOX_DIR := sandbox OBJ_DIR := obj LIB_DIR := lib INCLUDE_DIR := include BLASTEST_DIR := blastest TESTSUITE_DIR := testsuite VEND_DIR := vendor VEND_CPP_DIR := $(VEND_DIR)/cpp VEND_TESTCPP_DIR := $(VEND_DIR)/testcpp # The filename suffix for reference kernels. REFNM := ref # Source suffixes. CONFIG_SRC_SUFS := c KERNELS_SRC_SUFS := c s S ifneq ($(findstring hpx,$(THREADING_MODEL)),) FRAME_SRC_SUFS := c cpp else FRAME_SRC_SUFS := c endif ADDON_C99_SUFS := c ADDON_CXX_SUFS := cc cpp cxx ADDON_SRC_SUFS := $(ADDON_C99_SUFS) $(ADDON_CXX_SUFS) SANDBOX_C99_SUFS := c SANDBOX_CXX_SUFS := cc cpp cxx SANDBOX_SRC_SUFS := $(SANDBOX_C99_SUFS) $(SANDBOX_CXX_SUFS) # Header suffixes. FRAME_H99_SUFS := h FRAME_HDR_SUFS := $(FRAME_H99_SUFS) ADDON_H99_SUFS := h ADDON_HXX_SUFS := hh hpp hxx ADDON_HDR_SUFS := $(ADDON_H99_SUFS) $(ADDON_HXX_SUFS) SANDBOX_H99_SUFS := h SANDBOX_HXX_SUFS := hh hpp hxx SANDBOX_HDR_SUFS := $(SANDBOX_H99_SUFS) $(SANDBOX_HXX_SUFS) # Combine all header suffixes and remove duplicates via sort(). ALL_HDR_SUFS := $(sort $(FRAME_HDR_SUFS) \ $(ADDON_HDR_SUFS) \ $(SANDBOX_HDR_SUFS) ) ALL_H99_SUFS := $(sort $(FRAME_H99_SUFS) \ $(ADDON_H99_SUFS) \ $(SANDBOX_H99_SUFS) ) # The names of scripts that check output from the BLAS test drivers and # BLIS test suite. BLASTEST_CHECK := check-blastest.sh TESTSUITE_CHECK := check-blistest.sh # The names of the testsuite input/configuration files. TESTSUITE_CONF_GEN := input.general TESTSUITE_CONF_OPS := input.operations TESTSUITE_FAST_GEN := input.general.fast TESTSUITE_FAST_OPS := input.operations.fast TESTSUITE_MIXD_GEN := input.general.mixed TESTSUITE_MIXD_OPS := input.operations.mixed TESTSUITE_SALT_GEN := input.general.salt TESTSUITE_SALT_OPS := input.operations.salt TESTSUITE_OUT_FILE := output.testsuite # CHANGELOG file. CHANGELOG := CHANGELOG # Something for OS X so that echo -n works as expected. SHELL := bash # Construct paths to the four primary directories of source code: # the config directory, general framework code, reference kernel code, # and optimized kernel code. Also process paths for addon and sandbox # directories. CONFIG_PATH := $(DIST_PATH)/$(CONFIG_DIR) FRAME_PATH := $(DIST_PATH)/$(FRAME_DIR) REFKERN_PATH := $(DIST_PATH)/$(REFKERN_DIR) KERNELS_PATH := $(DIST_PATH)/$(KERNELS_DIR) ADDON_PATH := $(DIST_PATH)/$(ADDON_DIR) SANDBOX_PATH := $(DIST_PATH)/$(SANDBOX_DIR) BUILD_PATH := $(DIST_PATH)/$(BUILD_DIR) # Construct paths to some optional C++ template headers contributed by AMD. VEND_CPP_PATH := $(DIST_PATH)/$(VEND_CPP_DIR) VEND_TESTCPP_PATH := $(DIST_PATH)/$(VEND_TESTCPP_DIR) # Construct paths to the makefile fragments for the four primary directories # of source code: the config directory, general framework code, reference # kernel code, and optimized kernel code. CONFIG_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(CONFIG_DIR) FRAME_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(FRAME_DIR) REFKERN_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(REFKERN_DIR) KERNELS_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(KERNELS_DIR) ADDON_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(ADDON_DIR) SANDBOX_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(SANDBOX_DIR) # # --- Library name and local paths --------------------------------------------- # # Use lib/CONFIG_NAME as the default path to the local header files, but # allow the includer to override this value if it needs to point to an # installation directory. ifeq ($(strip $(LIB_PATH)),) LIB_PATH := $(LIB_DIR)/$(CONFIG_NAME) endif # Identify the base path for the intermediate library directory. We define # this path in terms of LIB_PATH, which gets a default value above (which is # what happens for the top-level Makefile). If LIB_PATH is specified by the # Makefile prior to including common.mk, that path is used instead. This # allows Makefiles for example code and test drivers to reference an installed # prefix directory for situations when the build directory no longer exists. BASE_LIB_PATH := $(LIB_PATH) # The base name of the BLIS library that we will build. LIBBLIS := libblis # The shared (dynamic) library file suffix is different for Linux and OS X. ifeq ($(OS_NAME),Darwin) SHLIB_EXT := dylib else ifeq ($(IS_WIN),yes) ifeq ($(IS_MSVC),no) SHLIB_EXT := dll.a else SHLIB_EXT := lib endif else SHLIB_EXT := so endif # Note: These names will be modified later to include the configuration and # version strings. LIBBLIS_A := $(LIBBLIS).a LIBBLIS_SO := $(LIBBLIS).$(SHLIB_EXT) # Append the base library path to the library names. LIBBLIS_A_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_A) LIBBLIS_SO_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SO) # Create a filepath to a local symlink to the soname--that is, the same as # LIBBLIS_SO_PATH except with the .so major version number. Since the shared # library lists its soname as 'libblis.so.n', where n is the .so major version # number, a symlink in BASE_LIB_PATH is needed so that ld can find the local # shared library when the testsuite is run via 'make test' or 'make check'. ifeq ($(OS_NAME),Darwin) # OS X shared library extensions. LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).$(SHLIB_EXT) LIBBLIS_SO_MMB_EXT := $(SO_MMB).$(SHLIB_EXT) else ifeq ($(IS_WIN),yes) # Windows shared library extension. LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).dll LIBBLIS_SO_MMB_EXT := else # Linux shared library extensions. LIBBLIS_SO_MAJ_EXT := $(SHLIB_EXT).$(SO_MAJOR) LIBBLIS_SO_MMB_EXT := $(SHLIB_EXT).$(SO_MMB) endif LIBBLIS_SONAME := $(LIBBLIS).$(LIBBLIS_SO_MAJ_EXT) LIBBLIS_SO_MAJ_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SONAME) # Construct the output path when building a shared library. # NOTE: This code and the code immediately above is a little curious and # perhaps could be refactored (carefully). ifeq ($(IS_WIN),yes) LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_MAJ_PATH) else LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_PATH) endif # # --- Utility program definitions ---------------------------------------------- # SH := /bin/sh MV := mv MKDIR := mkdir -p RM_F := rm -f RM_RF := rm -rf SYMLINK := ln -sf FIND := find GREP := grep EGREP := grep -E XARGS := xargs INSTALL := install -c DEVNULL := /dev/null # Script for creating a monolithic header file. #FLATTEN_H := $(DIST_PATH)/build/flatten-headers.sh FLATTEN_H := $(PYTHON) $(DIST_PATH)/build/flatten-headers.py # Default archiver flags. ARFLAGS := cr # Used to refresh CHANGELOG. GIT := git GIT_LOG := $(GIT) log --decorate # Define the locations of a script to generate a list of shared library symbols # within BLIS as well as the symbol file itself. GEN_SYMS := $(BUILD_PATH)/gen-libblis-symbols.sh SYM_FILE := $(BUILD_PATH)/libblis-symbols.def # # --- Default linker definitions ----------------------------------------------- # # NOTE: This section needs to reside before the inclusion of make_defs.mk # files (just below), as most configurations' make_defs.mk don't tinker # with things like LDFLAGS, but some do (or may), in which case they can # manually override whatever they need. # Define the external libraries we may potentially need at link-time. ifeq ($(IS_MSVC),yes) LIBM := else LIBM := -lm endif LIBMEMKIND := -lmemkind # Default linker flags. # NOTE: -lpthread is needed unconditionally because BLIS uses pthread_once() # to initialize itself in a thread-safe manner. The one exception to this # rule: if --disable-system is given at configure-time, LIBPTHREAD is empty. LDFLAGS := $(LDFLAGS_PRESET) $(LIBM) $(LIBPTHREAD) # Add libmemkind to the link-time flags, if it was enabled at configure-time. ifeq ($(MK_ENABLE_MEMKIND),yes) LDFLAGS += $(LIBMEMKIND) endif # Never use libm with Intel compilers. ifeq ($(CC_VENDOR),icc) LDFLAGS := $(filter-out $(LIBM),$(LDFLAGS)) endif # Never use libmemkind with Intel SDE. ifeq ($(DEBUG_TYPE),sde) LDFLAGS := $(filter-out $(LIBMEMKIND),$(LDFLAGS)) endif # If AddressSanitizer is enabled, add the compiler flag to LDFLAGS. ifeq ($(MK_ENABLE_ASAN),yes) LDFLAGS += -fsanitize=address endif # Specify the shared library's 'soname' field. # NOTE: The flag for creating shared objects is different for Linux and OS X. ifeq ($(OS_NAME),Darwin) # OS X shared library link flags. SOFLAGS := -dynamiclib ifeq ($(MK_ENABLE_RPATH),yes) SOFLAGS += -Wl,-install_name,@rpath/$(LIBBLIS_SONAME) else SOFLAGS += -Wl,-install_name,$(libdir)/$(LIBBLIS_SONAME) endif else SOFLAGS := -shared ifeq ($(IS_WIN),yes) # Windows shared library link flags. ifeq ($(IS_MSVC),yes) SOFLAGS += -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib else SOFLAGS += -Wl,--out-implib,$(BASE_LIB_PATH)/$(LIBBLIS).dll.a endif else # Linux shared library link flags. SOFLAGS += -Wl,-soname,$(LIBBLIS_SONAME) endif endif # Decide which library to link to for things like the testsuite and BLIS test # drivers. We default to the static library, unless only the shared library was # enabled, in which case we use the shared library. LIBBLIS_L := $(LIBBLIS_A) LIBBLIS_LINK := $(LIBBLIS_A_PATH) ifeq ($(MK_ENABLE_SHARED),yes) ifeq ($(MK_ENABLE_STATIC),no) LIBBLIS_L := $(LIBBLIS_SO) LIBBLIS_LINK := $(LIBBLIS_SO_PATH) ifeq ($(IS_WIN),no) # For Linux and OS X: set rpath property of shared object. ifeq ($(OS_NAME),Darwin) # rpath for test_libblis.x LDFLAGS += -Wl,-rpath,@executable_path/$(BASE_LIB_PATH) # rpath for BLAS tests LDFLAGS += -Wl,-rpath,@executable_path/../../../$(BASE_LIB_PATH) else # rpath for test_libblis.x LDFLAGS += -Wl,-rpath,'$$ORIGIN/$(BASE_LIB_PATH)' # rpath for BLAS tests LDFLAGS += -Wl,-rpath,'$$ORIGIN/../../../$(BASE_LIB_PATH)' endif endif endif # On windows, use the shared library even if static is created. ifeq ($(IS_WIN),yes) LIBBLIS_L := $(LIBBLIS_SO) LIBBLIS_LINK := $(LIBBLIS_SO_PATH) endif endif # # --- Include makefile definitions file ---------------------------------------- # # Define the name of the file containing build and architecture-specific # makefile definitions. MAKE_DEFS_FILE := make_defs.mk # Assemble a list of all configuration family members, including the # configuration family name itself. Note that sort() will remove duplicates # for situations where CONFIG_NAME is present in CONFIG_LIST, such as would # be the case for singleton families. CONFIG_LIST_FAM := $(sort $(strip $(CONFIG_LIST) $(CONFIG_NAME))) # Construct the paths to the makefile definitions files, each of which # resides in a separate configuration sub-directory. We use CONFIG_LIST_FAM # since we might need the makefile definitions associated with the # configuration family (if it is an umbrella family). # NOTE: We use the prefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/ instead of # $(CONFIG_PATH) so that make_defs.mk can be found when it is installed, # provided the caller defined SHARE_PATH to that install directory. CONFIG_PATHS := $(addprefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/, \ $(CONFIG_LIST_FAM)) MAKE_DEFS_MK_PATHS := $(addsuffix /$(MAKE_DEFS_FILE), $(CONFIG_PATHS)) # Initialize the list of included (found) configurations to empty. CONFIGS_INCL := # Include the makefile definitions files implied by the list of configurations. -include $(MAKE_DEFS_MK_PATHS) # Detect whether we actually got all of the make definitions files. If # we didn't, then maybe a configuration is mislabeled or missing. The # check-env-make-defs target checks ALL_MAKE_DEFS_MK_PRESENT and outputs # an error message if it is set to 'no'. # NOTE: We use CONFIG_LIST_FAM as the expected list of configurations. # This combines CONFIG_NAME with CONFIG_LIST. The inclusion of CONFIG_NAME # is needed for situations where the configuration family is an umbrella # family (e.g. 'intel64'), since families have separate make_def.mk files. CONFIGS_EXPECTED := $(CONFIG_LIST_FAM) ifeq ($(sort $(strip $(CONFIGS_INCL))), \ $(sort $(strip $(CONFIGS_EXPECTED)))) ALL_MAKE_DEFS_MK_PRESENT := yes else ALL_MAKE_DEFS_MK_PRESENT := no endif # # --- Configuration-agnostic flags --------------------------------------------- # # --- Linker program --- # Use whatever compiler was chosen. A C++ compiler must be used if HPX is enabled. ifneq ($(findstring hpx,$(THREADING_MODEL)),) LINKER := $(CXX) else LINKER := $(CC) endif # --- Warning flags --- CWARNFLAGS := # Disable unused function warnings and stop compiling on first error for # all compilers that accept such options: gcc, clang, and icc. ifneq ($(CC_VENDOR),ibm) ifneq ($(CC_VENDOR),nvc) CWARNFLAGS += -Wall -Wno-unused-function -Wfatal-errors else CWARNFLAGS += -Wall -Wno-unused-function endif endif # Disable tautological comparision warnings in clang. ifeq ($(CC_VENDOR),clang) CWARNFLAGS += -Wno-tautological-compare -Wno-pass-failed endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CWARNFLAGS,$(c)))) # --- Position-independent code flags (shared libraries only) --- # Note: Avoid -fPIC flags for Windows builds since all code is position- # independent. ifeq ($(IS_MSVC),yes) CPICFLAGS := endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call store-var-for,CPICFLAGS,$(c)))) # --- Symbol exporting flags (shared libraries only) --- ifeq ($(MK_ENABLE_SHARED),yes) # NOTE: These flags are only applied when building BLIS and not used by # applications that import BLIS compilation flags via the # $(get-user-cflags-for ...) function. # Determine default export behavior / visibility of symbols for gcc. ifeq ($(CC_VENDOR),gcc) ifeq ($(IS_WIN),yes) ifeq ($(EXPORT_SHARED),all) BUILD_SYMFLAGS := -Wl,--export-all-symbols, -Wl,--enable-auto-import else # ifeq ($(EXPORT_SHARED),public) BUILD_SYMFLAGS := -Wl,--exclude-all-symbols endif else # ifeq ($(IS_WIN),no) ifeq ($(EXPORT_SHARED),all) # Export all symbols by default. BUILD_SYMFLAGS := -fvisibility=default else # ifeq ($(EXPORT_SHARED),public) # Hide all symbols by default and export only those that have been annotated # as needing to be exported. BUILD_SYMFLAGS := -fvisibility=hidden endif endif endif # Determine default export behavior / visibility of symbols for icc. # NOTE: The Windows branches have been omitted since we currently make no # effort to support Windows builds via icc (only gcc/clang via AppVeyor). ifeq ($(CC_VENDOR),icc) ifeq ($(EXPORT_SHARED),all) # Export all symbols by default. BUILD_SYMFLAGS := -fvisibility=default else # ifeq ($(EXPORT_SHARED),public) # Hide all symbols by default and export only those that have been annotated # as needing to be exported. BUILD_SYMFLAGS := -fvisibility=hidden endif endif # Determine default export behavior / visibility of symbols for clang. ifeq ($(CC_VENDOR),clang) ifeq ($(IS_WIN),yes) ifeq ($(IS_MSVC),no) # This is a clang build targetting MinGW-w64 env ifeq ($(EXPORT_SHARED),all) BUILD_SYMFLAGS := -Wl,--export-all-symbols, -Wl,--enable-auto-import else # ifeq ($(EXPORT_SHARED),all) BUILD_SYMFLAGS := -Wl,--exclude-all-symbols endif endif # ifeq ($(IS_MSVC),no) ifeq ($(EXPORT_SHARED),all) # NOTE: clang on Windows does not appear to support exporting all symbols # by default, and therefore we ignore the value of EXPORT_SHARED. BUILD_SYMFLAGS := else # ifeq ($(EXPORT_SHARED),public) # NOTE: The default behavior of clang on Windows is to hide all symbols # and only export functions and other declarations that have beenannotated # as needing to be exported. BUILD_SYMFLAGS := endif else # ifeq ($(IS_WIN),no) ifeq ($(EXPORT_SHARED),all) # Export all symbols by default. BUILD_SYMFLAGS := -fvisibility=default else # ifeq ($(EXPORT_SHARED),public) # Hide all symbols by default and export only those that have been annotated # as needing to be exported. BUILD_SYMFLAGS := -fvisibility=hidden endif endif endif else #ifeq ($(MK_ENABLE_SHARED),no) # Don't modify CPICFLAGS for the various configuration family members. # Don't use any special symbol export flags. BUILD_SYMFLAGS := endif # --- Language flags --- # Enable C99. CLANGFLAGS := -std=c99 $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CLANGFLAGS,$(c)))) # Enable C++11, or C++17 if HPX threading is enabled. ifneq ($(findstring hpx,$(THREADING_MODEL)),) CXXLANGFLAGS := -std=c++17 else CXXLANGFLAGS := -std=c++11 endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CXXLANGFLAGS,$(c)))) # --- C Preprocessor flags --- # Enable clock_gettime() in time.h. CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L # Enable ip_mreq on macOS which is needed for ASIO which is needed for HPX. ifeq ($(OS_NAME),Darwin) CPPROCFLAGS += -D_DARWIN_C_SOURCE endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CPPROCFLAGS,$(c)))) # --- AddressSanitizer flags --- ifeq ($(MK_ENABLE_ASAN),yes) BUILD_ASANFLAGS := -fsanitize=address else BUILD_ASANFLAGS := endif # --- Threading flags --- # NOTE: We don't have to explicitly omit -pthread when --disable-system is given # since that option forces --enable-threading=single, and thus -pthread never # gets added to begin with. CTHREADFLAGS := CXXTHREADFLAGS := ifeq ($(CC_VENDOR),gcc) #ifneq ($(findstring auto,$(THREADING_MODEL)),) #THREADING_MODEL := openmp #endif ifneq ($(findstring openmp,$(THREADING_MODEL)),) CTHREADFLAGS += -fopenmp LDFLAGS += -fopenmp endif ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif endif ifeq ($(CC_VENDOR),icc) #ifneq ($(findstring auto,$(THREADING_MODEL)),) #THREADING_MODEL := openmp #endif ifneq ($(findstring openmp,$(THREADING_MODEL)),) CTHREADFLAGS += -fopenmp LDFLAGS += -fopenmp endif ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif endif ifeq ($(CC_VENDOR),clang) #ifneq ($(findstring auto,$(THREADING_MODEL)),) #THREADING_MODEL := pthreads #endif ifneq ($(findstring openmp,$(THREADING_MODEL)),) CTHREADFLAGS += -fopenmp LDFLAGS += -fopenmp endif ifneq ($(findstring pthreads,$(THREADING_MODEL)),) CTHREADFLAGS += -pthread LDFLAGS += $(LIBPTHREAD) endif endif # Threading flags for HPX. ifneq ($(findstring hpx,$(THREADING_MODEL)),) HPX_CXXFLAGS := $(shell pkg-config --cflags hpx_component) HPX_LDFLAGS := $(filter-out -shared,$(shell pkg-config --libs hpx_component)) CTHREADFLAGS += $(filter-out -std=%,$(HPX_CXXFLAGS)) LDFLAGS += $(HPX_LDFLAGS) ifeq ($(OS_NAME),Darwin) RPATH_PREFIX := -Wl,-rpath, LDFLAGS += $(patsubst -L%,$(RPATH_PREFIX)%,$(filter -L%,$(HPX_LDFLAGS))) endif endif # --- #pragma omp simd flags (used for reference kernels only) --- ifeq ($(PRAGMA_OMP_SIMD),yes) ifeq ($(CC_VENDOR),gcc) COMPSIMDFLAGS := -fopenmp-simd else ifeq ($(CC_VENDOR),clang) COMPSIMDFLAGS := -fopenmp-simd else ifeq ($(CC_VENDOR),icc) COMPSIMDFLAGS := -qopenmp-simd endif endif endif else # ifeq ($(PRAGMA_OMP_SIMD),no) COMPSIMDFLAGS := endif # # --- Adjust verbosity level manually using make V=[0,1] ----------------------- # ifeq ($(V),1) ENABLE_VERBOSE := yes BLIS_ENABLE_TEST_OUTPUT := yes endif ifeq ($(V),0) ENABLE_VERBOSE := no BLIS_ENABLE_TEST_OUTPUT := no endif # # --- Append OS-specific libraries to LDFLAGS ---------------------------------- # ifeq ($(OS_NAME),Linux) # Exclude -lrt on Android by detecting Bionic. # $(CC) -E bionic.h returns a "bionic" substring iff Bionic is detected. BIONIC_H_PATH := $(DIST_PATH)/build/detect/android/bionic.h BIONIC := $(findstring bionic,$(shell $(CC) -E $(BIONIC_H_PATH))) ifeq (,$(BIONIC)) LDFLAGS += -lrt endif endif # # --- LDFLAGS cleanup ---------------------------------------------------------- # # # --- Include makefile fragments ----------------------------------------------- # # Initialize our list of directory paths to makefile fragments with the empty # list. This variable will accumulate all of the directory paths in which # makefile fragments reside. FRAGMENT_DIR_PATHS := # Initialize our makefile variables that source code files will be accumulated # into by the makefile fragments. This initialization is very important! These # variables will end up with weird contents if we don't initialize them to # empty prior to recursively including the makefile fragments. MK_CONFIG_SRC := MK_KERNELS_SRC := MK_REFKERN_SRC := MK_FRAME_SRC := MK_ADDON_SRC := MK_SANDBOX_SRC := # -- config -- # Construct paths to each of the sub-configurations specified in the # configuration list. Note that we use CONFIG_LIST_FAM, which already # has CONFIG_NAME included (with duplicates removed). CONFIG_PATHS := $(addprefix $(CONFIG_FRAG_PATH)/, $(CONFIG_LIST_FAM)) # This variable is used by the include statements as they recursively include # one another. For the 'config' directory, we initialize it to that directory # in preparation to include the fragments in the configuration sub-directory. PARENT_SRC_PATH := $(CONFIG_PATH) PARENT_PATH := $(CONFIG_FRAG_PATH) # Recursively include the makefile fragments in each of the sub-configuration # directories. -include $(addsuffix /$(FRAGMENT_MK), $(CONFIG_PATHS)) # -- kernels -- # Construct paths to each of the kernel sets required by the sub-configurations # in the configuration list. KERNEL_PATHS := $(addprefix $(KERNELS_FRAG_PATH)/, $(KERNEL_LIST)) # This variable is used by the include statements as they recursively include # one another. For the 'kernels' directory, we initialize it to that directory # in preparation to include the fragments in the configuration sub-directory. PARENT_SRC_PATH := $(KERNELS_PATH) PARENT_PATH := $(KERNELS_FRAG_PATH) # Recursively include the makefile fragments in each of the kernels sub- # directories. -include $(addsuffix /$(FRAGMENT_MK), $(KERNEL_PATHS)) # -- ref_kernels -- # -- frame -- # This variable is used by the include statements as they recursively include # one another. For the framework and reference kernel source trees (ie: the # 'frame' and 'ref_kernels' directories), we initialize it to the top-level # directory since that is its parent. PARENT_SRC_PATH := $(DIST_PATH) PARENT_PATH := $(OBJ_DIR)/$(CONFIG_NAME) # Recursively include all the makefile fragments in the directories for the # reference kernels and portable framework. -include $(addsuffix /$(FRAGMENT_MK), $(REFKERN_FRAG_PATH)) -include $(addsuffix /$(FRAGMENT_MK), $(FRAME_FRAG_PATH)) # -- addon -- # Construct paths to each addon. # NOTE: If $(ADDON_LIST) is empty (because no addon was enabled at configure- # time) then $(ADDON_PATHS) will also be empty, which will cause no fragments # to be included. ADDON_PATHS := $(addprefix $(ADDON_FRAG_PATH)/, $(ADDON_LIST)) # This variable is used by the include statements as they recursively include # one another. For the 'addons' directory, we initialize it to that directory # in preparation to include the fragments in the configuration sub-directory. PARENT_SRC_PATH := $(ADDON_PATH) PARENT_PATH := $(ADDON_FRAG_PATH) # Recursively include the makefile fragments in each of the addons sub- # directories. -include $(addsuffix /$(FRAGMENT_MK), $(ADDON_PATHS)) # -- sandbox -- # Construct paths to each sandbox. (At present, there can be only one.) # NOTE: If $(SANDBOX) is empty (because no sandbox was enabled at configure- # time) then $(SANDBOX_PATHS) will also be empty, which will cause no # fragments to be included. SANDBOX_PATHS := $(addprefix $(SANDBOX_FRAG_PATH)/, $(SANDBOX)) # This variable is used by the include statements as they recursively include # one another. For the 'sandbox' directory, we initialize it to that directory # in preparation to include the fragments in the configuration sub-directory. PARENT_SRC_PATH := $(SANDBOX_PATH) PARENT_PATH := $(SANDBOX_FRAG_PATH) # Recursively include the makefile fragments in the sandbox sub-directory. -include $(addsuffix /$(FRAGMENT_MK), $(SANDBOX_PATHS)) # -- post-processing -- # Create a list of the makefile fragments using the variable into which each # of the above include statements accumulated their directory paths. MAKEFILE_FRAGMENTS := $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_DIR_PATHS)) # Detect whether we actually got any makefile fragments. If we didn't, then it # is likely that the user has not yet generated them (via configure). ifeq ($(strip $(MAKEFILE_FRAGMENTS)),) MAKEFILE_FRAGMENTS_PRESENT := no else MAKEFILE_FRAGMENTS_PRESENT := yes endif # # --- Important sets of header files and paths --------------------------------- # # Define a function that will expand all of the directory paths given in $(1) # to actual filepaths using the list of suffixes provided in $(2). get-filepaths = $(strip $(foreach path, $(1), \ $(foreach suf, $(2), \ $(wildcard $(path)/*.$(suf)) \ ) ) ) # Define a function that will expand all of the directory paths given in $(1) # to actual filepaths using the list of suffixes provided in $(2), taking only # the first expansion from each directory with at least one file matching # the current suffix. Finally, strip the filenames from all resulting files, # returning only the directory paths. get-dirpaths = $(dir $(foreach path, $(1), \ $(firstword \ $(foreach suf, $(2), \ $(wildcard $(path)/*.$(suf)) \ ) ) ) ) # We'll use three directory lists. The first is a list of all of the directories # in which makefile fragments were generated, plus the current directory. (The # current directory is needed so we include bli_config.h and bli_addon.h in the # processing of header files.) The second and third are subsets of the first # that begins with the addon and sandbox root paths, respectively. ALLFRAG_DIR_PATHS := . $(FRAGMENT_DIR_PATHS) ADDON_DIR_PATHS := $(filter $(ADDON_PATH)/%,$(ALLFRAG_DIR_PATHS)) SANDBOX_DIR_PATHS := $(filter $(SANDBOX_PATH)/%,$(ALLFRAG_DIR_PATHS)) ALL_H99_FILES := $(call get-filepaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) FRAME_H99_FILES := $(filter-out $(ADDON_PATH)/%, \ $(filter-out $(SANDBOX_PATH)/%, \ $(ALL_H99_FILES) \ ) ) ALL_H99_DIRPATHS := $(call get-dirpaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) ADDON_H99_FILES := $(call get-filepaths,$(ADDON_DIR_PATHS),$(ADDON_H99_SUFS)) ADDON_HXX_FILES := $(call get-filepaths,$(ADDON_DIR_PATHS),$(ADDON_HXX_SUFS)) ADDON_HDR_DIRPATHS := $(call get-dirpaths,$(ADDON_DIR_PATHS),$(ALL_HDR_SUFS)) SANDBOX_H99_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_H99_SUFS)) SANDBOX_HXX_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_HXX_SUFS)) SANDBOX_HDR_DIRPATHS := $(call get-dirpaths,$(SANDBOX_DIR_PATHS),$(ALL_HDR_SUFS)) # # --- blis.h header definitions ------------------------------------------------ # # Use include/CONFIG_NAME as the default path to the local header files, but # allow the includer to override this value if it needs to point to an # installation directory. ifeq ($(strip $(INC_PATH)),) INC_PATH := $(INCLUDE_DIR)/$(CONFIG_NAME) endif # Identify the base path for the intermediate include directory. We define # this path in terms of INC_PATH, which gets a default value above (which is # what happens for the top-level Makefile). If INC_PATH is specified by the # Makefile prior to including common.mk, that path is used instead. This # allows Makefiles for example code and test drivers to reference an installed # prefix directory for situations when the build directory no longer exists. BASE_INC_PATH := $(INC_PATH) # Isolate the path to blis.h by filtering the file from the list of framework # header files. BLIS_H := blis.h BLIS_H_SRC_PATH := $(filter %/$(BLIS_H), $(FRAME_H99_FILES)) # Construct the path to what will be the intermediate flattened/monolithic # blis.h file. BLIS_H_FLAT := $(BASE_INC_PATH)/$(BLIS_H) # Construct the path to the helper blis.h file that will reside one directory # up from the installed copy of blis.h. HELP_BLIS_H_PATH := $(BUILD_DIR)/$(BLIS_H) # # --- cblas.h header definitions ----------------------------------------------- # # Isolate the path to cblas.h by filtering the file from the list of framework # header files, and then strip the filename to obtain the directory in which # cblas.h resides. CBLAS_H := cblas.h CBLAS_H_SRC_PATH := $(filter %/$(CBLAS_H), $(FRAME_H99_FILES)) CBLAS_H_DIRPATH := $(dir $(CBLAS_H_SRC_PATH)) # Construct the path to what will be the intermediate flattened/monolithic # cblas.h file. CBLAS_H_FLAT := $(BASE_INC_PATH)/$(CBLAS_H) # Construct the path to the helper cblas.h file that will reside one directory # up from the installed copy of cblas.h. HELP_CBLAS_H_PATH := $(BUILD_DIR)/$(CBLAS_H) # # --- Compiler include path definitions ---------------------------------------- # # Obtain a list of header files #included inside of the bli_cntx_ref.c file. # Due to the way that bli_cntx_ref.c uses headers and macros, paths to these # files will be needed when compiling bli_cntx_ref.c with the monolithic header. ifeq ($(strip $(SHARE_PATH)),.) REF_KER_SRC := $(DIST_PATH)/$(REFKERN_DIR)/bli_cntx_ref.c # # NOTE: A redirect to /dev/null has been added to the grep command below because # as of version 3.8, grep outputs warnings when encountering stray backslashes # in regular expressions [1]. Versions older than 3.8 not only do not complain, # but actually seem to *require* the backslash, perhaps because of the way we # are invoking grep via GNU make's shell command. WHEN DEBUGGING ANYTHING # INVOLVING THE MAKE VARIABLE BELOW, PLEASE CONSIDER TEMPORARILY REMOVING THE # REDIRECT TO /dev/null SO THAT YOU SEE ANY MESSAGES SENT TO STANDARD ERROR. # # [1] https://lists.gnu.org/archive/html/info-gnu/2022-09/msg00001.html # REF_KER_HEADERS := $(shell $(GREP) "\#include" $(REF_KER_SRC) 2> $(DEVNULL) | sed -e "s/\#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g" | $(GREP) -v $(BLIS_H)) endif # Match each header found above with the path to that header, and then strip # leading, trailing, and internal whitespace. REF_KER_H_PATHS := $(call rm-dups,$(strip \ $(foreach header, $(REF_KER_HEADERS), \ $(dir $(filter %/$(header), \ $(FRAME_H99_FILES)))))) # Add -I to each header path so we can specify our include search paths to the # C compiler. Then add frame/include since it's needed when compiling source # files that #include bli_oapi_ba.h or bli_oapi_ex.h. REF_KER_I_PATHS := $(strip $(patsubst %, -I%, $(REF_KER_H_PATHS))) REF_KER_I_PATHS += -I$(DIST_PATH)/frame/include # Prefix the paths above with the base include path. # NOTE: We no longer need every header path in the source tree since we # now #include the monolithic/flattened blis.h instead. CINCFLAGS := -I$(BASE_INC_PATH) $(REF_KER_I_PATHS) # If CBLAS is enabled, we also include the path to the cblas.h directory so # that the compiler will be able to find cblas.h as the CBLAS source code is # being compiled. ifeq ($(MK_ENABLE_CBLAS),yes) CINCFLAGS += -I$(CBLAS_H_DIRPATH) endif # Obtain a list of header paths in the configured addons. Then add -I to each # header path. CADDONINCFLAGS := $(strip $(patsubst %, -I%, $(ADDON_HDR_DIRPATHS))) # Obtain a list of header paths in the configured sandbox. Then add -I to each # header path. CSANDINCFLAGS := $(strip $(patsubst %, -I%, $(SANDBOX_HDR_DIRPATHS))) # # --- BLIS configuration header definitions ------------------------------------ # # These files were created by configure, but we need to define them here so we # can remove them as part of the clean targets. BLIS_ADDON_H := ./bli_addon.h BLIS_CONFIG_H := ./bli_config.h # # --- Special preprocessor macro definitions ----------------------------------- # # Define a C preprocessor flag that is *only* defined when BLIS is being # compiled. (In other words, an application that #includes blis.h will not # get this cpp macro.) BUILD_CPPFLAGS := -DBLIS_IS_BUILDING_LIBRARY # end of ifndef COMMON_MK_INCLUDED conditional block endif blis-1.1/config/000077500000000000000000000000001474157777200135765ustar00rootroot00000000000000blis-1.1/config/README.md000066400000000000000000000014671474157777200150650ustar00rootroot00000000000000 For more information on sub-configurations and configuration families in BLIS, please read the Configuration Guide, which can be viewed in markdown-rendered form [from the BLIS wiki page](https://github.com/flame/blis/wiki/). If you don't have time, or are impatient, take a look at the `config_registry` file in the top-level directory of the BLIS distribution. It contains a grammar-like mapping of configuration names, or families, to sub-configurations, which may be other families. Keep in mind that the `/` notation: ``` : / ``` means that the kernel set associated with `` should be made available to the configuration `` if `` is targeted at configure-time. (Some configurations borrow kernels from other configurations, and this is how we specify that requirement.) blis-1.1/config/a64fx/000077500000000000000000000000001474157777200145265ustar00rootroot00000000000000blis-1.1/config/a64fx/bli_a64fx_sector_cache.h000066400000000000000000000104571474157777200211660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Forschunszentrum Juelich Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // A64FX: set up cache sizes // // Reference: A64FX (TM) specification Fujitsu HPC Extension // Link: https://github.com/fujitsu/A64FX/blob/master/doc/A64FX_Specification_HPC_Extension_v1_EN.pdf // // 63:15 | 14:12 | 11 | 10:08 | 07 | 06:04 | 03 | 02:00 | // RES0 | l1_sec3_max | RES0 | l1_sec2_max | RES0 | l1_sec1_max | RES0 | l1_sec0_max | // // the bits set number of maximum sectors from 0-7 // 000 - 0 // 001 - 1 // 010 - 2 // 011 - 3 // 100 - 4 // 101 - 5 // 110 - 6 // 111 - 7 // // For L1 we want to maximize the number of sectors for B // Configuration 1: 1 sector for C (sector 3) // 1 sector for A (sector 1) // 6 sectors for B (sector 2) // 0 sectors for the rest (sector 0) // // 16b bitfield conf. 1: 0b0 001 0 110 0 001 0 000 // // Configuration 2: 1 sector for C (sector 3) // 1 sector for A (sector 1) // 5 sectors for B (sector 2) // 1 sectors for the rest (sector 0) // // 16b bitfield conf. 2: 0b0 001 0 101 0 001 0 001 // // accessing the control register: // // MRS , S3_3_C11_C8_2 // MSR S3_3_C11_C8_2, // // TODO: First tests showed no change in performance, a deeper investigation // is necessary #define A64FX_SETUP_SECTOR_CACHE_SIZES(config_bitfield)\ {\ uint64_t sector_cache_config = config_bitfield;\ __asm__ volatile(\ "msr s3_3_c11_c8_2,%[sector_cache_config]"\ :\ : [sector_cache_config] "r" (sector_cache_config)\ :\ );\ } #define A64FX_SETUP_SECTOR_CACHE_SIZES_L2(config_bitfield)\ {\ uint64_t sector_cache_config = config_bitfield;\ __asm__ volatile(\ "msr s3_3_c15_c8_2,%[sector_cache_config]"\ :\ : [sector_cache_config] "r" (sector_cache_config)\ :\ );\ } #define A64FX_SET_CACHE_SECTOR(areg, tag, sparereg)\ " mov "#sparereg", "#tag" \n\t"\ " lsl "#sparereg", "#sparereg", 56 \n\t"\ " orr "#areg", "#areg", "#sparereg" \n\t" #define A64FX_READ_SECTOR_CACHE_SIZES(output_uint64)\ __asm__ volatile(\ "mrs %["#output_uint64"],s3_3_c11_c8_2"\ : [output_uint64] "=r" (output_uint64)\ : \ :\ ); #define A64FX_SCC(sec0,sec1,sec2,sec3)\ (uint64_t)((sec0 & 0x7LU) | ((sec1 & 0x7LU) << 4) | ((sec2 & 0x7LU) << 8) | ((sec3 & 0x7LU) << 12)) #define A64FX_SCC_L2(sec02,sec13)\ (uint64_t)((sec02 & 0x1FLU) | ((sec13 & 0x1FLU) << 8)) blis-1.1/config/a64fx/bli_cntx_init_a64fx.c000066400000000000000000000077271474157777200205440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "bli_a64fx_sector_cache.h" void bli_cntx_init_a64fx( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_a64fx_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armsve_asm_2vx10_unindexed, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armsve_asm_2vx10_unindexed, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_armsve_asm_2vx10_unindexed, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_armsve_asm_2vx10_unindexed, // packm BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_16xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_10xk, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 16, 16, 8 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 10, 10, 10, 10 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 256, 128, 192, 96 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 2048, 2048, 1536, 1536 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 23040, 26880, 11520, 11760 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); // Set A64FX cache sector sizes for each PE/CMG // SC Fugaku might disable users' setting cache sizes. #if !defined(CACHE_SECTOR_SIZE_READONLY) #pragma omp parallel { A64FX_SETUP_SECTOR_CACHE_SIZES(A64FX_SCC(0,1,3,0)) A64FX_SETUP_SECTOR_CACHE_SIZES_L2(A64FX_SCC_L2(9,28)) } #endif } blis-1.1/config/a64fx/bli_family_a64fx.h000066400000000000000000000042031474157777200200150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 256 #define BLIS_SIMD_MAX_NUM_REGISTERS 32 // SVE-specific configs. #define N_L1_SVE_DEFAULT 64 #define W_L1_SVE_DEFAULT 4 #define C_L1_SVE_DEFAULT 256 #define N_L2_SVE_DEFAULT 2048 #define W_L2_SVE_DEFAULT 16 #define C_L2_SVE_DEFAULT 256 #define N_L3_SVE_DEFAULT 8192 #define W_L3_SVE_DEFAULT 16 #define C_L3_SVE_DEFAULT 256 //#endif blis-1.1/config/a64fx/bli_kernel_defs_a64fx.h000066400000000000000000000037311474157777200210220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 32 #define BLIS_MR_d 16 #define BLIS_MR_c 16 #define BLIS_MR_z 8 #define BLIS_NR_s 10 #define BLIS_NR_d 10 #define BLIS_NR_c 10 #define BLIS_NR_z 10 //#endif blis-1.1/config/a64fx/make_defs.mk000066400000000000000000000055601474157777200170030ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := a64fx #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE -D_A64FX CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -ftree-vectorize -march=armv8-a+sve endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) CKVECFLAGS := # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/altra/000077500000000000000000000000001474157777200147015ustar00rootroot00000000000000blis-1.1/config/altra/QuickStart/000077500000000000000000000000001474157777200167735ustar00rootroot00000000000000blis-1.1/config/altra/QuickStart/TimeDGEMM.cfile000077500000000000000000000075321474157777200214610ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "blis.h" /*################################################### // To build with openmp: // Note: Don't need the -lomp on Linux gcc -fopenmp -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x // To build with pThreads source ./enable_blis.sh gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x // To run with QuickStart Macros... for N_CORES, S_SOCKETS blis_set_cores_and_sockets N S; $BLIS_NUMA time_gemm.x ###################################################*/ #include // for Linux stdarg //################################################### // Handy blis functions //################################################### // Returns 0.0 if out ofmatrix double GetReal(obj_t *m, int row, int col) { double im = 0, re = 0; // Imaginary component if (!m) return 0.0; bli_getijm(row, col, m, &re, &im); return re; } bool SetReal(obj_t *m, int row, int col, double dVal) { if (!m) return 0.0; bli_setijm(dVal, 0.0, row, col, m); return true; } //################################################### // The basic meat - a one shot //################################################### bool TimeBlis(long size) { int repeat = 3; // Best Of! double dAlpha = 1.0, dBeta = 0.0; // simplest case! //============== Allocate matrices ============= obj_t* alpha = (obj_t*) calloc(1, sizeof(obj_t)); obj_t* beta = (obj_t*) calloc(1, sizeof(obj_t)); bli_obj_create(BLIS_DOUBLE, 1, 1, 0, 0, alpha); bli_obj_create(BLIS_DOUBLE, 1, 1, 0, 0, beta); // Full gemm is alpha * A * B + beta * C bli_setsc(dAlpha, 0.0, alpha); // alpha is one bli_setsc(dBeta, 0.0, beta); // beta is zero //============================================== printf("Initializing %g GB of Matrices...\n", 8.0 * size * size * 3.0 / 1024.0 / 1024.0 / 1024.0); obj_t* a = (obj_t*) calloc(1, sizeof(obj_t)); obj_t* b = (obj_t*) calloc(1, sizeof(obj_t)); obj_t* c = (obj_t*) calloc(1, sizeof(obj_t)); bli_obj_create(BLIS_DOUBLE, size, size, size, 1, c); bli_obj_create(BLIS_DOUBLE, size, size, size, 1, a); bli_obj_create(BLIS_DOUBLE, size, size, size, 1, b); // Create Random matrices // that are well conditioned and invertible // (Note: this can be slow) // bli_randm(c); bli_randm(a); bli_randm(b); //============================================== // DO the timing, blis style... //============================================== double dBestTime = DBL_MAX; for (int i = 0; i < repeat; i++) { printf("Performing DGEMM %d of %d\n", i + 1, repeat); fflush(stdout); double dStartTime = bli_clock(); bli_gemm(alpha, a, b, beta, c); // Always look at best of N for timing! dBestTime = bli_clock_min_diff( dBestTime, dStartTime ); } double gflops = ( 2.0 * size * size * size ) / ( dBestTime * 1.0e9 ); printf("Best DGEMM run completed in %g seconds @ size= \t %ld \t %g \t gigaflops\n", dBestTime, size, gflops); fflush(stdout); return true; } int main( int argc, char** argv ) { long size = 0; int cores = 1, sweep_inc = 0; printf("Details of parallelism are set by environment variables.\n"); printf("Arg1 = size=M=N=K for DGEMM\n" "optional arg2 = size step for sweep.\n"); if (argc < 2) return 0; if (argc > 1) { size = atol(argv[1]); printf("User set size to %ld\n", size); } if (argc > 2) { sweep_inc = atoi(argv[3]); printf("User set sweep size inc to %d\n", sweep_inc); } if (sweep_inc == 0) TimeBlis(size); else { for (int i = size; i >= sweep_inc; i -= sweep_inc) TimeBlis(i); } return 0; } blis-1.1/config/altra/QuickStart/blis_build_altra.sh000077500000000000000000000013641474157777200226310ustar00rootroot00000000000000#!/bin/bash echo "#######################################################" echo "Building standard OpenMP BLIS..." echo "#######################################################" . ./blis_setenv.sh quiet echo "##########################################################" echo "Configuring BLIS for Altra using OpenMP for parallelism..." echo "##########################################################" . ./blis_configure_altra.sh quiet echo "Switching to directory $BLIS_HOME" pushd $BLIS_HOME > /dev/null make -j popd > /dev/null if [ "$1" != "notest" ]; then . ./blis_test.sh quiet fi . ./blis_setenv.sh echo "##########################################################" echo "...done" echo "##########################################################" blis-1.1/config/altra/QuickStart/blis_build_altra_pthreads.sh000077500000000000000000000014031474157777200245150ustar00rootroot00000000000000#!/bin/bash echo "#######################################################" echo "Building pThreads version of BLIS..." echo "#######################################################" . ./blis_setenv.sh quiet echo "##########################################################" echo "Configuring BLIS for Altra using pThreads for parallelism..." echo "##########################################################" . ./blis_configure_altra_pthreads.sh quiet echo "Switching to directory $BLIS_HOME" pushd $BLIS_HOME > /dev/null make -j popd > /dev/null if [ "$1" != "notest" ]; then . ./blis_test.sh quiet fi . ./blis_setenv.sh echo "##########################################################" echo "...done" echo "##########################################################" blis-1.1/config/altra/QuickStart/blis_build_both_libraries.sh000077500000000000000000000041221474157777200245110ustar00rootroot00000000000000#!/bin/bash echo "##########################################################" echo "Creating both OpenMP and pThread BLIS libraries..." echo "##########################################################" echo "First, Creating pThread library..." echo "##########################################################" . ./blis_build_altra_pthreads.sh notest echo "##########################################################" echo "Saving the pThreads build..." echo "##########################################################" # Temporarily move the pthreads build mkdir $BLIS_HOME/.tempinc mkdir $BLIS_HOME/.templib mv $BLIS_INC/* $BLIS_HOME/.tempinc/ mv $BLIS_LIB/* $BLIS_HOME/.templib/ # And rename the pthread versions of the include and library files #echo "##########################################################" pushd $BLIS_HOME/.tempinc/ > /dev/null echo "Renaming pThread-enabled blis.h -> blisP.h" mv blis.h blisP.h popd > /dev/null pushd $BLIS_HOME/.templib/ > /dev/null for f in $(ls -1); do destf=${f/blis/blisP} echo "Renaming pThread library $f -> $destf" mv "$f" "$destf" # Fix the symbolic links if [[ -L "$destf" ]]; then target=$(readlink $destf) target=${target/blis/blisP} \rm "$destf" ln -s "$target" "$destf" fi done popd > /dev/null echo "##########################################################" echo "##########################################################" echo "Second, Creating OpenMP library..." echo "##########################################################" . ./blis_build_altra.sh notest echo "##########################################################" echo "Restoring the pThreads build..." echo "##########################################################" # And move the pthread versions back mv $BLIS_HOME/.tempinc/* $BLIS_INC/ mv $BLIS_HOME/.templib/* $BLIS_LIB/ rmdir $BLIS_HOME/.tempinc rmdir $BLIS_HOME/.templib . ./blis_test.sh quiet . ./blis_setenv.sh echo "##########################################################" echo "Done creating BLIS libraries..." echo "##########################################################" blis-1.1/config/altra/QuickStart/blis_configure_altra.sh000077500000000000000000000007661474157777200235200ustar00rootroot00000000000000#!/bin/bash if [ "$1" = "quiet" ]; then quiet_confopenmp="quiet" else quiet_confopenmp="" fi if [ "$quiet_confopenmp" = "" ]; then echo "##########################################################" echo "Configuring BLIS for Altra using OpenMP for parallelism..." echo "##########################################################" fi . ./blis_setenv.sh $quiet_confopenmp pushd $BLIS_HOME > /dev/null make distclean ./configure -t openmp --disable-pba-pools altra popd > /dev/null blis-1.1/config/altra/QuickStart/blis_configure_altra_pthreads.sh000077500000000000000000000010021474157777200253720ustar00rootroot00000000000000#!/bin/bash if [ "$1" = "quiet" ]; then quiet_confpthreads="quiet" else quiet_confpthreads="" fi if [ "$quiet_confpthreads" = "" ]; then echo "##########################################################" echo "Configuring BLIS for Altra using pThreads for parallelism..." echo "##########################################################" fi . ./blis_setenv.sh $quiet_confpthreads pushd $BLIS_HOME > /dev/null make distclean ./configure -t pthreads --disable-pba-pools altra popd > /dev/null blis-1.1/config/altra/QuickStart/blis_quick_start_altra.txt000077500000000000000000000170641474157777200242740ustar00rootroot00000000000000Welcome to the Altra Platform! We've made some scripts to help you build and use blis, but feel free to look at them for your own inspiration. Note that all the provided scripts must be SOURCED, NOT executed! This is because they set up environment variables needed for the steps below. Using BLIS requires a few steps: 1) Configuring the library 2) Building the library & validating it 3) Linking your program with BLIS 4) Setting the environment parameters for an optimized blis to run your program Let's briefly touch on these points, and how the scripts provided can help But first, let's make sure your configuration is correct... Open blis_setenv.sh In the Platform Specific: section, around line 50 or so, you will see: firmware=107 or firmware=108 If your firmware is version 1.08 or greater, make sure this is set to 108, else make sure it's set to 107. Ampere entirely changed the CoreID mappings between these versions. The Altra Platform updated their firmware to 1.08 in May 2021, so if your firmware was updated later than that, odds are good that you have 2.04 or later. Note: the scripts referenced here modify environment variables, so they must be sourced. E.g., with source or . =================================================== 1) Configuring the library 2) Building the library & validating it =================================================== There are custom configuration options for Altra, but, as a user, your main decision is whether you want BLIS to use OpenMP or pthreads for parallelism? OpenMP is the default option, since OpenMP allows thread pinning and thus results in better performance. To build with OpenMP use: . ./blis_build_altra.sh However, some platforms (like MacOS) cannot use OpenMP at all. In this case, you want to build the pThreads version of BLIS: . ./blis_build_altra_pthreads.sh In both cases, it will create libblis.a in $BLIS_HOME/lib/$BLIS_ARCH Try doing that in the root blis directory, depending on your OS. LINUX: . ./blis_build_altra.sh MacOS Apple Silicon: . ./blis_build_altra_pthreads.sh ---------------------------------------------------------------------------- HOWEVER, there is a tricky case: If you link BLIS with a program that uses pThreads, you MUST use the pthreads version of BLIS, even though it will be slower. This is because there is a bug in which attempting to use both pthreads AND OpenMP will pin all threads to a single core and essentially freeze your program. If this is a possibility, you may want to have both libraries available and switch between them for each application. The script: . ./blis_build_both_libraries.sh will build both versions, with the pThreads version being called libblisP.a, and a second header blisP.h This is a little inconvenient, and we're working on improving the situation in the near future. ---------------------------------------------------------------------------- The build will additionally check the library, but if you would like to check a la carte, do . ./blis_test.sh You should see near the bottom: check-blastest.sh: All BLAS tests passed! check-blistest.sh: All BLIS tests passed! -------------------------------- Finally, here's a script that will be important when you are doing testing. This performs the important step of unsetting any parameters effecting blis parallelism. . ./blis_unset_par.sh =================================================== 3) Building and Linking your program with BLIS =================================================== This depends whether you are using the pThreads version of BLIS or the OpenMP version... Note this uses the BLIS locations automatically defined when sourcing blis_setenv.sh . ./blis_setenv.sh (This will display you environment variable settings, your blis libraries and headers (if built), and also unset blis parallelism parameters for safety.) // BUILDING your app with the OpenMP version of BLIS: // Note: Don't need -lomp on Linux gcc -fopenmp -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH MyFiles.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o MyExe // To build with pThreads gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH MyFiles.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o MyExe // NOTE: If you used the scripts to build BOTH versions of blis, then use the renamed blis lib: gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH MyFiles.c $BLIS_HOME/lib/$BLIS_ARCH/libblisP.a -lpthread -lm -o MyExe Let's try building a sample program that we've included to test BLIS: TimeDGEMM.c If this is a new terminal session, make sure to: . ./blis_setenv.sh (there's no harm in running it again.) Linux: gcc -fopenmp -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x Apple Silicon: gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x But don't try a timed run, yet - there's some runtime setup that needs to be done... =================================================== 4) Setting the environment parameters for an optimized blis to run your program =================================================== The performance of some BLAS libraries are very sensitive to the compiler or the page size. BLIS is not sensitive to either of these things, but it IS extremely dependent on pinning the right threads to the right cores. We have scripts to help... . ./blis_setenv.sh This not only tells you where blis is, but it also creates shell functions to set affinity, threading, and NUMA control for each run. There is a shell function created that you can call to set up how your threads will be pinned and used: blis_set_cores_and_sockets Specifying the number of sockets is important because BLIS is configured very differently for one vs two sockets. Example: # Set up for a run with 128 total cores, half on each of 2 sockets. blis_set_cores_and_sockets 128 2 You can also use the following aliases: blis_set_cores_1S 80 # Run 80 cores on 1 socket blis_set_cores_2S 160 # Run 160 cores across 2 sockets, 80 on each NOTE that at the moment, for multi-threaded BLIS, we only support active number of threads that are a multiple of 8. If you want to test single threaded performance, you can set export BLIS_NUM_THREADS=1 Launching your executable: If your application is MyExe, your commands to perform an optimized BLIS run might look like this: blis_set_cores_2S 160 $BLIS_NUMA MyExe This will set cpu affinity correctly, set BLIS parallelism correctly, set the NUMA mode correctly, and launch your EXE. --------------------------------------------------- Let's try an example using the executable that you created in section 3, remembering that if you're on an Apple Silicon Mac, make sure that you don't use more cores than you have. (For example, 8 on an M1 Max.) Apple Silicon: (No NUMA is needed for Apple Mac) blis_set_cores_1S 8; ./time_gemm.x 8000 (in tests, we obtained about 95% of peak with Neon64 - about 366 Gigaflops) Altra Dual Socket: blis_set_cores_2S 160; $BLIS_NUMA ./time_gemm.x 12000 (in tests, we obtained about 3.2 TF, or 82% of peak CONGRATULATIONS! You're ready to use BLIS! =================================================== Performance Note: =================================================== We continue to enhance BLIS performance on the Altra. One current issue is that not all variants of triangular operations obtain full performance. For TRSM, best performance is with left triangular operations. For TRMM, DUAL SOCKET, best performance is with left triangular operations. For TRMM, SINGLE SOCKET, best performance is with right triangular operations. blis-1.1/config/altra/QuickStart/blis_quick_start_uninstall_altra.sh000077500000000000000000000014721474157777200261540ustar00rootroot00000000000000#!/bin/bash # This utility will remove all the configuration # Specific QuickStart files from the blis directory. # This is very useful when switching configurations! # if [[ -n "$BLIS_HOME" ]]; then echo "REMOVING ALL ALTRA QUICKSTART FILES FROM $BLIS_HOME" rm $BLIS_HOME/blis_build_altra_pthreads.sh rm $BLIS_HOME/blis_build_altra.sh rm $BLIS_HOME/blis_build_both_libraries.sh rm $BLIS_HOME/blis_configure_altra_pthreads.sh rm $BLIS_HOME/blis_configure_altra.sh rm $BLIS_HOME/blis_quick_start_altra.txt rm $BLIS_HOME/blis_setenv.sh rm $BLIS_HOME/blis_unset_par.sh rm $BLIS_HOME/blis_test.sh rm $BLIS_HOME/TimeDGEMM.c rm $BLIS_HOME/time_gemm.x rm $BLIS_HOME/blis_quick_start_uninstall_altra.sh else echo "ONLY USE THIS SCRIPT FROM THE BLIS HOME DIRECTORY!" echo "BLIS_HOME is not set!" fi blis-1.1/config/altra/QuickStart/blis_setenv.sh000077500000000000000000000211651474157777200216540ustar00rootroot00000000000000#!/bin/bash ####################################################################### # Brought to you by Oracle Labs ####################################################################### # Tested in bash and zsh ####################################################################### # Sets up all the environment variables needed for running blis. # For this reason, the script MUST be sourced, NOT executed! # Needs to be run from BLIS directory to have a portable definition of # BLIS_HOME. If this setup doesn't work for you, you may hard code # the path to BLIS_HOME, but then be careful if you copy or move it! ####################################################################### # This is the top level blis directory - it is recommended to set to an absolulte path # Can be overridden by user to be called anywhere, but then less portable # export BLIS_HOME=. # PORTABLE - Set BLIS_HOME to the blis directory containing this script # We need to get the full path to the file in case this is called from another directory if [ "$1" = "quiet" ]; then quiet_setenv="quiet" else quiet_setenv="" fi if [[ -n "$BASH_VERSION" ]] ; then file_path_and_name="$( dirname "${BASH_SOURCE[0]}" )/blis_set_home_dir.sh" else file_path_and_name="$( dirname "$0" )/blis_set_home_dir.sh" fi if [ -f "$file_path_and_name" ] ; then . $file_path_and_name quiet else echo "ERROR - this file is not being executed from a blis home directory." echo "If you cannot use this script in a home directory, you can hardcode" echo "the absolute location of BLIS_HOME in blis_setenv,bash, but this" echo "is then less portable and more error prone with multiple blis" echo "directories." return fi ####################################################################### # Platform Specific: ####################################################################### # Important! Set the firmware number to 107 for firmware version 1.07 or earlier, # and 108 for 1.08 or later. We were unable to test 1.08 at this time. # firmware=108 qualifier="or later" if (( firmware == 107 )); then qualifier="or earlier" fi # Use altra for both single and double socket - this might change export BLIS_ARCH="altra" export BLIS_LIB=$BLIS_HOME/lib/$BLIS_ARCH export BLIS_INC=$BLIS_HOME/include/$BLIS_ARCH # Verify: if [ "$quiet_setenv" = "" ]; then echo "#################################################################" echo "CoreID affinity assumes firmware version on this machine is $firmware $qualifier" echo "BLIS_HOME set to $BLIS_HOME" echo "BLIS_INC set to $BLIS_INC" echo "=================================================================" ls -l $BLIS_INC echo "-----------------------------------------------------------------" echo "BLIS_LIB set to $BLIS_LIB" echo "-----------------------------------------------------------------" ls -l $BLIS_LIB echo "#################################################################" fi # Affinity Macros, etc export BLIS_NUMA="numactl --localalloc" # Use with firmware versions 1.07 and earlier. export BLIS_AFFINITY_2S_1_07="0 40 20 60 4 44 24 64 8 48 28 68 12 52 32 72 2 42 22 62 6 46 26 66 10 50 30 70 14 54 34 74 1 41 21 61 5 45 25 65 9 49 29 69 13 53 33 73 3 43 23 63 7 47 27 67 11 51 31 71 15 55 35 75 16 56 36 76 18 58 38 78 17 57 37 77 19 59 39 79 80 120 100 140 84 124 104 144 88 128 108 148 92 132 112 152 82 122 102 142 86 126 106 146 90 130 110 150 94 134 114 154 81 121 101 141 85 125 105 145 89 129 109 149 93 133 113 153 83 123 103 143 87 127 107 147 91 131 111 151 95 135 115 155 96 136 116 156 98 138 118 158 97 137 117 157 99 139 119 159" export BLIS_AFFINITY_1S_1_07="0 40 20 60 4 44 24 64 8 48 28 68 12 52 32 72 2 42 22 62 6 46 26 66 10 50 30 70 14 54 34 74 1 41 21 61 5 45 25 65 9 49 29 69 13 53 33 73 3 43 23 63 7 47 27 67 11 51 31 71 15 55 35 75 16 56 36 76 18 58 38 78 17 57 37 77 19 59 39 79" # Use with firmware versions 1.08+ # Warning - this has not been tested. # export BLIS_AFFINITY_2S_1_08="28, 29, 38, 39, 2, 3, 12, 13, 6, 7, 16, 17, 0, 1, 10, 11, 68, 69, 78, 79, 42, 43, 52, 53, 46, 47, 56, 57, 40, 41, 50, 51, 24, 25, 34, 35, 20, 21, 30, 31, 26, 27, 36, 37, 22, 23, 32, 33, 64, 65, 74, 75, 60, 61, 70, 71, 66, 67, 76, 77, 62, 63, 72, 73, 8, 9, 18, 19, 4, 5, 14, 15, 48, 49, 58, 59, 44, 45, 54, 55, 108, 109, 118, 119, 82, 83, 92, 93, 86, 87, 96, 97, 80, 81, 90, 91, 148, 149, 158, 159, 122, 123, 132, 133, 126, 127, 136, 137, 120, 121, 130, 131, 104, 105, 114, 115, 100, 101, 110, 111, 106, 107, 116, 117, 102, 103, 112, 113, 144, 145, 154, 155, 140, 141, 150, 151, 146, 147, 156, 157, 142, 143, 152, 153, 88, 89, 98, 99, 84, 85, 94, 95, 128, 129, 138, 139, 124, 125, 134, 135" export BLIS_AFFINITY_1S_1_08="28, 29, 38, 39, 2, 3, 12, 13, 6, 7, 16, 17, 0, 1, 10, 11, 68, 69, 78, 79, 42, 43, 52, 53, 46, 47, 56, 57, 40, 41, 50, 51, 24, 25, 34, 35, 20, 21, 30, 31, 26, 27, 36, 37, 22, 23, 32, 33, 64, 65, 74, 75, 60, 61, 70, 71, 66, 67, 76, 77, 62, 63, 72, 73, 8, 9, 18, 19, 4, 5, 14, 15, 48, 49, 58, 59, 44, 45, 54, 55" # Parallelism on the Altra is very flat: # Set JC to number of sockets: export BLIS_JC_NT=2 # Set JR to groups of 8: export BLIS_HR_NT=8 # Set IC to the number of cores per socket / 8: export BLIS_IC_NT=10 # Experimental: Allow you to set threading and # Core affinity on single or dual sockets for # N threads. Currently, we only support N as # a multple of 8 # Max Altra cores per socket CPS=80 # Use Bash Arrays: # Choose which CoreID mapping to go with based on the firmware ID if (($firmware == 107)); then arrayCoreIDs=(0 40 20 60 4 44 24 64 8 48 28 68 12 52 32 72 2 42 22 62 6 46 26 66 10 50 30 70 14 54 34 74 1 41 21 61 5 45 25 65 9 49 29 69 13 53 33 73 3 43 23 63 7 47 27 67 11 51 31 71 15 55 35 75 16 56 36 76 18 58 38 78 17 57 37 77 19 59 39 79 80 120 100 140 84 124 104 144 88 128 108 148 92 132 112 152 82 122 102 142 86 126 106 146 90 130 110 150 94 134 114 154 81 121 101 141 85 125 105 145 89 129 109 149 93 133 113 153 83 123 103 143 87 127 107 147 91 131 111 151 95 135 115 155 96 136 116 156 98 138 118 158 97 137 117 157 99 139 119 159) elif (($firmware == 108)); then arrayCoreIDs=(28 29 38 39 2 3 12 13 6 7 16 17 0 1 10 11 68 69 78 79 42 43 52 53 46 47 56 57 40 41 50 51 24 25 34 35 20 21 30 31 26 27 36 37 22 23 32 33 64 65 74 75 60 61 70 71 66 67 76 77 62 63 72 73 8 9 18 19 4 5 14 15 48 49 58 59 44 45 54 55 108 109 118 119 82 83 92 93 86 87 96 97 80 81 90 91 148 149 158 159 122 123 132 133 126 127 136 137 120 121 130 131 104 105 114 115 100 101 110 111 106 107 116 117 102 103 112 113 144 145 154 155 140 141 150 151 146 147 156 157 142 143 152 153 88 89 98 99 84 85 94 95 128 129 138 139 124 125 134 135) else echo "ERROR - UNSUPPORTED FIRMWARE $firmware" exit -1 fi # Brief check: @ = list all numbers, loop for i in ${}; do ... done # for Array Size, do ${#arr[@]} # echo "CoreID array has ${#arrayCoreIDs[@]} elements" # echo "CoreID array set to: ${arrayCoreIDs[@]}" # Give the TOTAL core count: # Single socket runs blis_set_cores_and_sockets() { cores=$1 sockets=$2 # echo "Cores = $cores, sockets=$sockets" # Round up to nearest 8 cores per socket: cores_per_group=8 if (( $sockets == 2 )); then cores_per_group=16; fi core_round_inc=$(($cores_per_group-1)) cores_per_socket=$(($cores)) cores=$(($cores + $core_round_inc)) groups_per_socket=$(($cores / $cores_per_group)) rounded_cores=$(( $groups_per_socket * $cores_per_group )) # echo "Rounded Cores = $rounded_cores" # echo "Groups Per Socket = $groups_per_socket" # set the parallelism for one socket with N cores: # Set JC to number of sockets: export BLIS_JC_NT=$sockets # Set JR to groups of 8: export BLIS_JR_NT=8 # Set IC to the number of cores per socket / 8: export BLIS_IC_NT=$groups_per_socket # Using an old version of zsh syntax that's compatible with bash if (( $sockets == 1 )); then # Simple single socket case # quotes # export GOMP_CPU_AFFINITY="\"${arrayCoreIDs[@]:0:$rounded_cores}\"" # No quotes... export GOMP_CPU_AFFINITY="${arrayCoreIDs[@]:0:$rounded_cores}" else # Dual socket case half_cores=$(( $rounded_cores / 2 )) # echo "Half cores are $half_cores" # quotes # export GOMP_CPU_AFFINITY="\"${arrayCoreIDs[@]:0:$half_cores} ${arrayCoreIDs[@]:$CPS:$half_cores}\"" # No quotes export GOMP_CPU_AFFINITY="${arrayCoreIDs[@]:0:$half_cores} ${arrayCoreIDs[@]:$CPS:$half_cores}" fi echo "Activating $rounded_cores cores across $sockets sockets..." echo "GOMP_CPU_AFFINITY set to $GOMP_CPU_AFFINITY" echo "JC/IC/JR = $BLIS_JC_NT/$BLIS_IC_NT/$BLIS_JR_NT" } # Convenience functions: blis_set_cores_1S() { blis_set_cores_and_sockets $1 1 ; } blis_set_cores_2S() { blis_set_cores_and_sockets $1 2 ; } # For safety: . ./blis_unset_par.sh blis-1.1/config/altra/QuickStart/blis_test.sh000077500000000000000000000011301474157777200213150ustar00rootroot00000000000000#!/bin/bash if [ "$1" = "quiet" ]; then quiet_blistest="quiet" else quiet_blistest="" fi # We don't want to quiet this part: echo "#################################################################" echo "Simple testing of BLIS - use testsuite for more extensive tests." echo "#################################################################" . ./blis_setenv.sh $quiet_blistest # It's critical to unset parallelism parameters before # running the test code! . ./blis_unset_par.sh quiet echo "Switching to directory $BLIS_HOME" pushd $BLIS_HOME > /dev/null make check -j popd > /dev/null blis-1.1/config/altra/QuickStart/blis_unset_par.sh000077500000000000000000000007541474157777200223510ustar00rootroot00000000000000#!/bin/blis if [ "$1" = "quiet" ]; then quiet_unsetpar="quiet" else quiet_unsetpar="" fi if [ "$quiet_unsetpar" = "" ]; then echo "#########################################################" echo " UNSETTING BLIS ENVIRONMENT VARIABLES THAT SET THREADING" echo " AND AFFINITY." echo "#########################################################" fi unset BLIS_JC_NT unset BLIS_JR_NT unset BLIS_IC_NT unset BLIS_NUM_THREADS unset OMP_NUM_THREADS unset GOMP_CPU_AFFINITY blis-1.1/config/altra/bli_cntx_init_altra.c000066400000000000000000000066551474157777200210710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022, Oracle Labs, Oracle Corporation Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_altra( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_altra_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 192, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 480, -1, -1 ); // Changed d to 480 - LDR // bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 6144, -1, -1 ); // Doubled NC bli_blksz_init_easy( &blkszs[ BLIS_NC ], 12288, 8192, -1, -1 ); // Increased NC slightly more // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/altra/bli_family_altra.h000066400000000000000000000043411474157777200203460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // Version with 16 byte alignment and jr=8 #define BLIS_THREAD_MAX_JR 8 // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #define BLIS_FORCE_ROLL_PACKM_REF_KERNEL // Temporary microtile of for each supported datatype: // - s: 8 * 12 * sizeof(float) // - d: 6 * 8 * sizeof(double) // Thus, 384 bytes should be sufficient. #define BLIS_STACK_BUF_MAX_SIZE 384 // Empirical best choices for TRMM #define BLIS_DISABLE_TRMM_RIGHT_IF_JC_GT_1_ELSE_DISABLE_LEFT_IF_DP blis-1.1/config/altra/bli_kernel_defs_altra.h000066400000000000000000000035731474157777200213540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 6 #define BLIS_NR_s 12 #define BLIS_NR_d 8 //#endif blis-1.1/config/altra/make_defs.mk000066400000000000000000000056761474157777200171660ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := altra #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -mcpu=neoverse-n1 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=neoverse-n1 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mcpu=neoverse-n1 else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/altramax/000077500000000000000000000000001474157777200154075ustar00rootroot00000000000000blis-1.1/config/altramax/QuickStart/000077500000000000000000000000001474157777200175015ustar00rootroot00000000000000blis-1.1/config/altramax/QuickStart/TimeDGEMM.cfile000077500000000000000000000075321474157777200221670ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "blis.h" /*################################################### // To build with openmp: // Note: Don't need the -lomp on Linux gcc -fopenmp -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x // To build with pThreads source ./enable_blis.sh gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x // To run with QuickStart Macros... for N_CORES, S_SOCKETS blis_set_cores_and_sockets N S; $BLIS_NUMA time_gemm.x ###################################################*/ #include // for Linux stdarg //################################################### // Handy blis functions //################################################### // Returns 0.0 if out ofmatrix double GetReal(obj_t *m, int row, int col) { double im = 0, re = 0; // Imaginary component if (!m) return 0.0; bli_getijm(row, col, m, &re, &im); return re; } bool SetReal(obj_t *m, int row, int col, double dVal) { if (!m) return 0.0; bli_setijm(dVal, 0.0, row, col, m); return true; } //################################################### // The basic meat - a one shot //################################################### bool TimeBlis(long size) { int repeat = 3; // Best Of! double dAlpha = 1.0, dBeta = 0.0; // simplest case! //============== Allocate matrices ============= obj_t* alpha = (obj_t*) calloc(1, sizeof(obj_t)); obj_t* beta = (obj_t*) calloc(1, sizeof(obj_t)); bli_obj_create(BLIS_DOUBLE, 1, 1, 0, 0, alpha); bli_obj_create(BLIS_DOUBLE, 1, 1, 0, 0, beta); // Full gemm is alpha * A * B + beta * C bli_setsc(dAlpha, 0.0, alpha); // alpha is one bli_setsc(dBeta, 0.0, beta); // beta is zero //============================================== printf("Initializing %g GB of Matrices...\n", 8.0 * size * size * 3.0 / 1024.0 / 1024.0 / 1024.0); obj_t* a = (obj_t*) calloc(1, sizeof(obj_t)); obj_t* b = (obj_t*) calloc(1, sizeof(obj_t)); obj_t* c = (obj_t*) calloc(1, sizeof(obj_t)); bli_obj_create(BLIS_DOUBLE, size, size, size, 1, c); bli_obj_create(BLIS_DOUBLE, size, size, size, 1, a); bli_obj_create(BLIS_DOUBLE, size, size, size, 1, b); // Create Random matrices // that are well conditioned and invertible // (Note: this can be slow) // bli_randm(c); bli_randm(a); bli_randm(b); //============================================== // DO the timing, blis style... //============================================== double dBestTime = DBL_MAX; for (int i = 0; i < repeat; i++) { printf("Performing DGEMM %d of %d\n", i + 1, repeat); fflush(stdout); double dStartTime = bli_clock(); bli_gemm(alpha, a, b, beta, c); // Always look at best of N for timing! dBestTime = bli_clock_min_diff( dBestTime, dStartTime ); } double gflops = ( 2.0 * size * size * size ) / ( dBestTime * 1.0e9 ); printf("Best DGEMM run completed in %g seconds @ size= \t %ld \t %g \t gigaflops\n", dBestTime, size, gflops); fflush(stdout); return true; } int main( int argc, char** argv ) { long size = 0; int cores = 1, sweep_inc = 0; printf("Details of parallelism are set by environment variables.\n"); printf("Arg1 = size=M=N=K for DGEMM\n" "optional arg2 = size step for sweep.\n"); if (argc < 2) return 0; if (argc > 1) { size = atol(argv[1]); printf("User set size to %ld\n", size); } if (argc > 2) { sweep_inc = atoi(argv[3]); printf("User set sweep size inc to %d\n", sweep_inc); } if (sweep_inc == 0) TimeBlis(size); else { for (int i = size; i >= sweep_inc; i -= sweep_inc) TimeBlis(i); } return 0; } blis-1.1/config/altramax/QuickStart/blis_build_altramax.sh000077500000000000000000000014001474157777200240340ustar00rootroot00000000000000#!/bin/bash echo "#######################################################" echo "Building standard OpenMP BLIS..." echo "#######################################################" . ./blis_setenv.sh quiet echo "#############################################################" echo "Configuring BLIS for Altramax using OpenMP for parallelism..." echo "#############################################################" . ./blis_configure_altramax.sh quiet echo "Switching to directory $BLIS_HOME" pushd $BLIS_HOME > /dev/null make -j popd > /dev/null if [ "$1" != "notest" ]; then . ./blis_test.sh quiet fi . ./blis_setenv.sh echo "##########################################################" echo "...done" echo "##########################################################" blis-1.1/config/altramax/QuickStart/blis_build_altramax_pthreads.sh000077500000000000000000000014231474157777200257330ustar00rootroot00000000000000#!/bin/bash echo "#######################################################" echo "Building pThreads version of BLIS..." echo "#######################################################" . ./blis_setenv.sh quiet echo "###############################################################" echo "Configuring BLIS for Altramax using pThreads for parallelism..." echo "###############################################################" . ./blis_configure_altramax_pthreads.sh quiet echo "Switching to directory $BLIS_HOME" pushd $BLIS_HOME > /dev/null make -j popd > /dev/null if [ "$1" != "notest" ]; then . ./blis_test.sh quiet fi . ./blis_setenv.sh echo "##########################################################" echo "...done" echo "##########################################################" blis-1.1/config/altramax/QuickStart/blis_build_both_libraries.sh000077500000000000000000000041301474157777200252160ustar00rootroot00000000000000#!/bin/bash echo "##########################################################" echo "Creating both OpenMP and pThread BLIS libraries..." echo "##########################################################" echo "First, Creating pThread library..." echo "##########################################################" . ./blis_build_altramax_pthreads.sh notest echo "##########################################################" echo "Saving the pThreads build..." echo "##########################################################" # Temporarily move the pthreads build mkdir $BLIS_HOME/.tempinc mkdir $BLIS_HOME/.templib mv $BLIS_INC/* $BLIS_HOME/.tempinc/ mv $BLIS_LIB/* $BLIS_HOME/.templib/ # And rename the pthread versions of the include and library files #echo "##########################################################" pushd $BLIS_HOME/.tempinc/ > /dev/null echo "Renaming pThread-enabled blis.h -> blisP.h" mv blis.h blisP.h popd > /dev/null pushd $BLIS_HOME/.templib/ > /dev/null for f in $(ls -1); do destf=${f/blis/blisP} echo "Renaming pThread library $f -> $destf" mv "$f" "$destf" # Fix the symbolic links if [[ -L "$destf" ]]; then target=$(readlink $destf) target=${target/blis/blisP} \rm "$destf" ln -s "$target" "$destf" fi done popd > /dev/null echo "##########################################################" echo "##########################################################" echo "Second, Creating OpenMP library..." echo "##########################################################" . ./blis_build_altramax.sh notest echo "##########################################################" echo "Restoring the pThreads build..." echo "##########################################################" # And move the pthread versions back mv $BLIS_HOME/.tempinc/* $BLIS_INC/ mv $BLIS_HOME/.templib/* $BLIS_LIB/ rmdir $BLIS_HOME/.tempinc rmdir $BLIS_HOME/.templib . ./blis_test.sh quiet . ./blis_setenv.sh echo "##########################################################" echo "Done creating BLIS libraries..." echo "##########################################################" blis-1.1/config/altramax/QuickStart/blis_configure_altramax.sh000077500000000000000000000010021474157777200247140ustar00rootroot00000000000000#!/bin/bash if [ "$1" = "quiet" ]; then quiet_confopenmp="quiet" else quiet_confopenmp="" fi if [ "$quiet_confopenmp" = "" ]; then echo "#############################################################" echo "Configuring BLIS for Altramax using OpenMP for parallelism..." echo "#############################################################" fi . ./blis_setenv.sh $quiet_confopenmp pushd $BLIS_HOME > /dev/null make distclean ./configure -t openmp --disable-pba-pools altramax popd > /dev/null blis-1.1/config/altramax/QuickStart/blis_configure_altramax_pthreads.sh000077500000000000000000000010221474157777200266100ustar00rootroot00000000000000#!/bin/bash if [ "$1" = "quiet" ]; then quiet_confpthreads="quiet" else quiet_confpthreads="" fi if [ "$quiet_confpthreads" = "" ]; then echo "###############################################################" echo "Configuring BLIS for Altramax using pThreads for parallelism..." echo "###############################################################" fi . ./blis_setenv.sh $quiet_confpthreads pushd $BLIS_HOME > /dev/null make distclean ./configure -t pthreads --disable-pba-pools altramax popd > /dev/null blis-1.1/config/altramax/QuickStart/blis_quick_start_altramax.txt000077500000000000000000000167211474157777200255070ustar00rootroot00000000000000Welcome to the Altramax Platform! We've made some scripts to help you build and use blis, but feel free to look at them for your own inspiration. Note that all the provided scripts must be SOURCED, NOT executed! This is because they set up environment variables needed for the steps below. Using BLIS requires a few steps: 1) Configuring the library 2) Building the library & validating it 3) Linking your program with BLIS 4) Setting the environment parameters for an optimized blis to run your program Let's briefly touch on these points, and how the scripts provided can help But first, let's make sure your configuration is correct... Open blis_setenv.sh In the Platform Specific: section, around line 50 or so, you will see: firmware=205 or firmware=204 If your firmware is version 2.05 or greater (most likely), make sure this is set to 205, else make sure it's set to 204. Ampere changed the CoreID mappings between these versions around May 2022. Note: the scripts referenced here modify environment variables, so they must be sourced. E.g., with source or . =================================================== 1) Configuring the library 2) Building the library & validating it =================================================== There are custom configuration options for Altramax, but, as a user, your main decision is whether you want BLIS to use OpenMP or pthreads for parallelism? OpenMP is the default option, since OpenMP allows thread pinning and thus results in better performance. To build with OpenMP use: . ./blis_build_altramax.sh However, some platforms (like MacOS) cannot use OpenMP at all. In this case, you want to build the pThreads version of BLIS: . ./blis_build_altramax_pthreads.sh In both cases, it will create libblis.a in $BLIS_HOME/lib/$BLIS_ARCH Try doing that in the root blis directory, depending on your OS. LINUX: . ./blis_build_altramax.sh MacOS Apple Silicon: . ./blis_build_altramax_pthreads.sh ---------------------------------------------------------------------------- HOWEVER, there is a tricky case: If you link BLIS with a program that uses pThreads, you MUST use the pthreads version of BLIS, even though it will be slower. This is because there is a bug in which attempting to use both pthreads AND OpenMP will pin all threads to a single core and essentially freeze your program. If this is a possibility, you may want to have both libraries available and switch between them for each application. The script: . ./blis_build_both_libraries.sh will build both versions, with the pThreads version being called libblisP.a, and a second header blisP.h This is a little inconvenient, and we're working on improving the situation in the near future. ---------------------------------------------------------------------------- The build will additionally check the library, but if you would like to check a la carte, do . ./blis_test.sh You should see near the bottom: check-blastest.sh: All BLAS tests passed! check-blistest.sh: All BLIS tests passed! -------------------------------- Finally, here's a script that will be important when you are doing testing. This performs the important step of unsetting any parameters effecting blis parallelism. . ./blis_unset_par.sh =================================================== 3) Building and Linking your program with BLIS =================================================== This depends whether you are using the pThreads version of BLIS or the OpenMP version... Note this uses the BLIS locations automatically defined when sourcing blis_setenv.sh . ./blis_setenv.sh (This will display you environment variable settings, your blis libraries and headers (if built), and also unset blis parallelism parameters for safety.) // BUILDING your app with the OpenMP version of BLIS: // Note: Don't need -lomp on Linux gcc -fopenmp -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH MyFiles.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o MyExe // To build with pThreads gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH MyFiles.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o MyExe // NOTE: If you used the scripts to build BOTH versions of blis, then use the renamed blis lib: gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH MyFiles.c $BLIS_HOME/lib/$BLIS_ARCH/libblisP.a -lpthread -lm -o MyExe Let's try building a sample program that we've included to test BLIS: TimeDGEMM.c If this is a new terminal session, make sure to: . ./blis_setenv.sh (there's no harm in running it again.) Linux: gcc -fopenmp -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x Apple Silicon: gcc -O2 -g -I$BLIS_HOME/include/$BLIS_ARCH TimeDGEMM.c $BLIS_HOME/lib/$BLIS_ARCH/libblis.a -lpthread -lm -o time_gemm.x But don't try a timed run, yet - there's some runtime setup that needs to be done... =================================================== 4) Setting the environment parameters for an optimized blis to run your program =================================================== The performance of some BLAS libraries are very sensitive to the compiler or the page size. BLIS is not sensitive to either of these things, but it IS extremely dependent on pinning the right threads to the right cores. We have scripts to help... . ./blis_setenv.sh This not only tells you where blis is, but it also creates shell functions to set affinity, threading, and NUMA control for each run. There is a shell function created that you can call to set up how your threads will be pinned and used: blis_set_cores_and_sockets Specifying the number of sockets is important because BLIS is configured very differently for one vs two sockets. Example: # Set up for a run with 128 total cores, half on each of 2 sockets. blis_set_cores_and_sockets 128 2 You can also use the following aliases: blis_set_cores_1S 128 # Run 128 cores on 1 socket blis_set_cores_2S 256 # Run 256 cores across 2 sockets, 128 on each NOTE that at the moment, for multi-threaded BLIS, we only support active number of threads that are a multiple of 8. If you want to test single threaded performance, you can set export BLIS_NUM_THREADS=1 Launching your executable: If your application is MyExe, your commands to perform an optimized BLIS run might look like this: blis_set_cores_1S 128 $BLIS_NUMA MyExe This will set cpu affinity correctly, set BLIS parallelism correctly, set the NUMA mode correctly, and launch your EXE. --------------------------------------------------- Let's try an example using the executable that you created in section 3, remembering that if you're on an Apple Silicon Mac, make sure that you don't use more cores than you have. (For example, 8 on an M1 Max.) Apple Silicon: (No NUMA is needed for Apple platforms.) blis_set_cores_1S 8; ./time_gemm.x 8000 (in tests, we obtained about 95% of peak with Neon64 - about 366 Gigaflops) AltraMax Single Socket: blis_set_cores_1S 128; $BLIS_NUMA ./time_gemm.x 12000 (in tests, we obtained about 2.6 TF, or 85% of peak CONGRATULATIONS! You're ready to use BLIS! =================================================== Performance Note: =================================================== We continue to enhance BLIS performance on the Altramax. One current issue is that not all variants of triangular operations obtain full performance. For TRSM, best performance is with left triangular operations. For TRMM, DUAL SOCKET, best performance is with left triangular operations. For TRMM, SINGLE SOCKET, best performance is with right triangular operations. blis-1.1/config/altramax/QuickStart/blis_quick_start_uninstall_altramax.sh000077500000000000000000000015171474157777200273700ustar00rootroot00000000000000#!/bin/bash # This utility will remove all the configuration # Specific QuickStart files from the blis directory. # This is very useful when switching configurations! # if [[ -n "$BLIS_HOME" ]]; then echo "REMOVING ALL ALTRAMAX QUICKSTART FILES FROM $BLIS_HOME" rm $BLIS_HOME/blis_build_altramax_pthreads.sh rm $BLIS_HOME/blis_build_altramax.sh rm $BLIS_HOME/blis_build_both_libraries.sh rm $BLIS_HOME/blis_configure_altramax_pthreads.sh rm $BLIS_HOME/blis_configure_altramax.sh rm $BLIS_HOME/blis_quick_start_altramax.txt rm $BLIS_HOME/blis_setenv.sh rm $BLIS_HOME/blis_unset_par.sh rm $BLIS_HOME/blis_test.sh rm $BLIS_HOME/TimeDGEMM.c rm $BLIS_HOME/time_gemm.x rm $BLIS_HOME/blis_quick_start_uninstall_altramax.sh else echo "ONLY USE THIS SCRIPT FROM THE BLIS HOME DIRECTORY!" echo "BLIS_HOME is not set!" fi blis-1.1/config/altramax/QuickStart/blis_setenv.sh000077500000000000000000000236011474157777200223570ustar00rootroot00000000000000#!/bin/bash ####################################################################### # Brought to you by Oracle Labs ####################################################################### # Tested in bash and zsh ####################################################################### # Sets up all the environment variables needed for running blis. # For this reason, the script MUST be sourced, NOT executed! # Needs to be run from BLIS directory to have a portable definition of # BLIS_HOME. If this setup doesn't work for you, you may hard code # the path to BLIS_HOME, but then be careful if you copy or move it! ####################################################################### # This is the top level blis directory - it is recommended to set to an absolulte path # Can be overridden by user to be called anywhere, but then less portable # export BLIS_HOME=. # PORTABLE - Set BLIS_HOME to the blis directory containing this script # We need to get the full path to the file in case this is called from another directory if [ "$1" = "quiet" ]; then quiet_setenv="quiet" else quiet_setenv="" fi if [[ -n "$BASH_VERSION" ]] ; then file_path_and_name="$( dirname "${BASH_SOURCE[0]}" )/blis_set_home_dir.sh" else file_path_and_name="$( dirname "$0" )/blis_set_home_dir.sh" fi if [ -f "$file_path_and_name" ] ; then . $file_path_and_name quiet else echo "ERROR - this file is not being executed from a blis home directory." echo "If you cannot use this script in a home directory, you can hardcode" echo "the absolute location of BLIS_HOME in blis_setenv,bash, but this" echo "is then less portable and more error prone with multiple blis" echo "directories." return fi ####################################################################### # Platform Specific: # Important! Set the firmware flag to 204 for 2.04 or earlier, # and 205 for 2.05 or later. firmware=205 # Use altramax for both single and double socket - this might change export BLIS_ARCH="altramax" export BLIS_LIB=$BLIS_HOME/lib/$BLIS_ARCH export BLIS_INC=$BLIS_HOME/include/$BLIS_ARCH # Verify: if [ "$quiet_setenv" = "" ]; then echo "BLIS_HOME set to $BLIS_HOME" echo "BLIS_INC set to $BLIS_INC" echo "-----------------------------------------------------------------" ls -l $BLIS_INC echo "-----------------------------------------------------------------" echo "BLIS_LIB set to $BLIS_LIB" echo "-----------------------------------------------------------------" ls -l $BLIS_LIB echo "-----------------------------------------------------------------" fi # Affinity Macros, etc export BLIS_NUMA="numactl --localalloc" # Use with firmware versions 2.04 and earlier. # You can check the firmware version using dmidecode export BLIS_AFFINITY_2S_2_04="0 64 32 96 4 68 36 100 1 65 33 97 5 69 37 101 2 66 34 98 6 70 38 102 3 67 35 99 7 71 39 103 8 72 40 104 12 76 44 108 9 73 41 105 13 77 45 109 10 74 42 106 14 78 46 110 11 75 43 107 15 79 47 111 16 80 48 112 20 84 52 116 17 81 49 113 21 85 53 117 18 82 50 114 22 86 54 118 19 83 51 115 23 87 55 119 24 88 56 120 26 90 58 122 25 89 57 121 27 91 59 123 28 92 60 124 30 94 62 126 29 93 61 125 31 95 63 127 128 192 160 224 132 196 164 228 129 193 161 225 133 197 165 229 130 194 162 226 134 198 166 230 131 195 163 227 135 199 167 231 136 200 168 232 140 204 172 236 137 201 169 233 141 205 173 237 138 202 170 234 142 206 174 238 139 203 171 235 143 207 175 239 144 208 176 240 148 212 180 244 145 209 177 241 149 213 181 245 146 210 178 242 150 214 182 246 147 211 179 243 151 215 183 247 152 216 184 248 154 218 186 250 153 217 185 249 155 219 187 251 156 220 188 252 158 222 190 254 157 221 189 253 159 223 191 255" export BLIS_AFFINITY_1S_2_04="0 64 32 96 4 68 36 100 1 65 33 97 5 69 37 101 2 66 34 98 6 70 38 102 3 67 35 99 7 71 39 103 8 72 40 104 12 76 44 108 9 73 41 105 13 77 45 109 10 74 42 106 14 78 46 110 11 75 43 107 15 79 47 111 16 80 48 112 20 84 52 116 17 81 49 113 21 85 53 117 18 82 50 114 22 86 54 118 19 83 51 115 23 87 55 119 24 88 56 120 26 90 58 122 25 89 57 121 27 91 59 123 28 92 60 124 30 94 62 126 29 93 61 125 31 95 63 127" # Use with firmware versions 2.05 and later # You can check the firmware version using dmidecode export BLIS_AFFINITY_2S_2_05="0 1 64 65 8 9 72 73 2 3 66 67 10 11 74 75 4 5 68 69 12 13 76 77 6 7 70 71 14 15 78 79 16 17 80 81 24 25 88 89 18 19 82 83 26 27 90 91 20 21 84 85 28 29 92 93 22 23 86 87 30 31 94 95 32 33 96 97 40 41 104 105 34 35 98 99 42 43 106 107 36 37 100 101 44 45 108 109 38 39 102 103 46 47 110 111 48 49 112 113 52 53 116 117 50 51 114 115 54 55 118 119 56 57 120 121 60 61 124 125 58 59 122 123 62 63 126 127 128 129 192 193 136 137 200 201 130 131 194 195 138 139 202 203 132 133 196 197 140 141 204 205 134 135 198 199 142 143 206 207 144 145 208 209 152 153 216 217 146 147 210 211 154 155 218 219 148 149 212 213 156 157 220 221 150 151 214 215 158 159 222 223 160 161 224 225 168 169 232 233 162 163 226 227 170 171 234 235 164 165 228 229 172 173 236 237 166 167 230 231 174 175 238 239 176 177 240 241 180 181 244 245 178 179 242 243 182 183 246 247 184 185 248 249 188 189 252 253 186 187 250 251 190 191 254 255" export BLIS_AFFINITY_1S_2_05="0 1 64 65 8 9 72 73 2 3 66 67 10 11 74 75 4 5 68 69 12 13 76 77 6 7 70 71 14 15 78 79 16 17 80 81 24 25 88 89 18 19 82 83 26 27 90 91 20 21 84 85 28 29 92 93 22 23 86 87 30 31 94 95 32 33 96 97 40 41 104 105 34 35 98 99 42 43 106 107 36 37 100 101 44 45 108 109 38 39 102 103 46 47 110 111 48 49 112 113 52 53 116 117 50 51 114 115 54 55 118 119 56 57 120 121 60 61 124 125 58 59 122 123 62 63 126 127" # Parallelism on the Altramax is very flat: # Set JC to number of sockets: export BLIS_JC_NT=2 # Set JR to groups of 8: export BLIS_HR_NT=8 # Set IC to the number of cores per socket / 8: export BLIS_IC_NT=16 # Experimental: Allow you to set threading and # Core affinity on single or dual sockets for # N threads. Currently, we only support N as # a multple of 8 # Maximum Altramax cores per socket CPS=128 # Use Bash Arrays: if (($firmware == 204)); then arrayCoreIDs=(0 64 32 96 4 68 36 100 1 65 33 97 5 69 37 101 2 66 34 98 6 70 38 102 3 67 35 99 7 71 39 103 8 72 40 104 12 76 44 108 9 73 41 105 13 77 45 109 10 74 42 106 14 78 46 110 11 75 43 107 15 79 47 111 16 80 48 112 20 84 52 116 17 81 49 113 21 85 53 117 18 82 50 114 22 86 54 118 19 83 51 115 23 87 55 119 24 88 56 120 26 90 58 122 25 89 57 121 27 91 59 123 28 92 60 124 30 94 62 126 29 93 61 125 31 95 63 127 128 192 160 224 132 196 164 228 129 193 161 225 133 197 165 229 130 194 162 226 134 198 166 230 131 195 163 227 135 199 167 231 136 200 168 232 140 204 172 236 137 201 169 233 141 205 173 237 138 202 170 234 142 206 174 238 139 203 171 235 143 207 175 239 144 208 176 240 148 212 180 244 145 209 177 241 149 213 181 245 146 210 178 242 150 214 182 246 147 211 179 243 151 215 183 247 152 216 184 248 154 218 186 250 153 217 185 249 155 219 187 251 156 220 188 252 158 222 190 254 157 221 189 253 159 223 191 255) elif (($firmware == 205)); then arrayCoreIDs=(0 1 64 65 8 9 72 73 2 3 66 67 10 11 74 75 4 5 68 69 12 13 76 77 6 7 70 71 14 15 78 79 16 17 80 81 24 25 88 89 18 19 82 83 26 27 90 91 20 21 84 85 28 29 92 93 22 23 86 87 30 31 94 95 32 33 96 97 40 41 104 105 34 35 98 99 42 43 106 107 36 37 100 101 44 45 108 109 38 39 102 103 46 47 110 111 48 49 112 113 52 53 116 117 50 51 114 115 54 55 118 119 56 57 120 121 60 61 124 125 58 59 122 123 62 63 126 127 128 129 192 193 136 137 200 201 130 131 194 195 138 139 202 203 132 133 196 197 140 141 204 205 134 135 198 199 142 143 206 207 144 145 208 209 152 153 216 217 146 147 210 211 154 155 218 219 148 149 212 213 156 157 220 221 150 151 214 215 158 159 222 223 160 161 224 225 168 169 232 233 162 163 226 227 170 171 234 235 164 165 228 229 172 173 236 237 166 167 230 231 174 175 238 239 176 177 240 241 180 181 244 245 178 179 242 243 182 183 246 247 184 185 248 249 188 189 252 253 186 187 250 251 190 191 254 255) else echo "ERROR - UNSUPPORTED FIRMWARE $firmware" exit -1 fi # Brief check: @ = list all numbers, loop for i in ${}; do ... done # for Array Size, do ${#arr[@]} # echo "CoreID array has ${#arrayCoreIDs[@]} elements" # echo "CoreID array set to: ${arrayCoreIDs[@]}" # Give the TOTAL core count: # Single socket runs blis_set_cores_and_sockets() { cores=$1 sockets=$2 # echo "Cores = $cores, sockets=$sockets" # Round up to nearest 8 cores per socket: cores_per_group=8 if (( $sockets == 2 )); then cores_per_group=16; fi core_round_inc=$(($cores_per_group-1)) cores_per_socket=$(($cores)) cores=$(($cores + $core_round_inc)) groups_per_socket=$(($cores / $cores_per_group)) rounded_cores=$(( $groups_per_socket * $cores_per_group )) # echo "Rounded Cores = $rounded_cores" # echo "Groups Per Socket = $groups_per_socket" # set the parallelism for one socket with N cores: # Set JC to number of sockets: export BLIS_JC_NT=$sockets # Set JR to groups of 8: export BLIS_JR_NT=8 # Set IC to the number of cores per socket / 8: export BLIS_IC_NT=$groups_per_socket # Using an old version of zsh syntax that's compatible with bash if (( $sockets == 1 )); then # Simple single socket case # quotes # export GOMP_CPU_AFFINITY="\"${arrayCoreIDs[@]:0:$rounded_cores}\"" # No quotes... export GOMP_CPU_AFFINITY="${arrayCoreIDs[@]:0:$rounded_cores}" else # Dual socket case half_cores=$(( $rounded_cores / 2 )) # echo "Half cores are $half_cores" # quotes # export GOMP_CPU_AFFINITY="\"${arrayCoreIDs[@]:0:$half_cores} ${arrayCoreIDs[@]:$CPS:$half_cores}\"" # No quotes export GOMP_CPU_AFFINITY="${arrayCoreIDs[@]:0:$half_cores} ${arrayCoreIDs[@]:$CPS:$half_cores}" fi echo "Activating $rounded_cores cores across $sockets sockets..." echo "GOMP_CPU_AFFINITY set to $GOMP_CPU_AFFINITY" echo "JC/IC/JR = $BLIS_JC_NT/$BLIS_IC_NT/$BLIS_JR_NT" } # Convenience functions: blis_set_cores_1S() { blis_set_cores_and_sockets $1 1 ; } blis_set_cores_2S() { blis_set_cores_and_sockets $1 2 ; } # For safety: . ./blis_unset_par.sh blis-1.1/config/altramax/QuickStart/blis_test.sh000077500000000000000000000011271474157777200220310ustar00rootroot00000000000000#!/bin/bash if [ "$1" = "quiet" ]; then quiet_blistest="quiet" else quiet_blistest="" fi # We don't want to quiet this part: echo "#################################################################" echo "Simple testing of BLIS - use testsuite for more extensive tests." echo "#################################################################" . ./blis_setenv.sh $quiet_blistest # It's critical to unset parallelism parameters before # running the test code! . ./blis_unset_par.sh quiet echo "Switching to directory $BLIS_HOME" pushd $BLIS_HOME > /dev/null make check -j popd > /dev/null blis-1.1/config/altramax/QuickStart/blis_unset_par.sh000077500000000000000000000007541474157777200230570ustar00rootroot00000000000000#!/bin/blis if [ "$1" = "quiet" ]; then quiet_unsetpar="quiet" else quiet_unsetpar="" fi if [ "$quiet_unsetpar" = "" ]; then echo "#########################################################" echo " UNSETTING BLIS ENVIRONMENT VARIABLES THAT SET THREADING" echo " AND AFFINITY." echo "#########################################################" fi unset BLIS_JC_NT unset BLIS_JR_NT unset BLIS_IC_NT unset BLIS_NUM_THREADS unset OMP_NUM_THREADS unset GOMP_CPU_AFFINITY blis-1.1/config/altramax/bli_cntx_init_altramax.c000066400000000000000000000066631474157777200223040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2022, Oracle Labs, Oracle Corporation Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_altramax( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_altramax_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 192, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 480, -1, -1 ); // Changed d to 480 - LDR // bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 6144, -1, -1 ); // Doubled NC bli_blksz_init_easy( &blkszs[ BLIS_NC ], 12288, 8192, -1, -1 ); // Increased NC slightly more // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/altramax/bli_family_altramax.h000066400000000000000000000037631474157777200215710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // Version with 16 byte alignment and jr=8 #define BLIS_THREAD_MAX_JR 8 // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #define BLIS_FORCE_ROLL_PACKM_REF_KERNEL #define BLIS_DISABLE_TRMM_RIGHT_IF_JC_GT_1_ELSE_DISABLE_LEFT_IF_DP blis-1.1/config/altramax/bli_kernel_defs_altramax.h000066400000000000000000000035731474157777200225700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 6 #define BLIS_NR_s 12 #define BLIS_NR_d 8 //#endif blis-1.1/config/altramax/make_defs.mk000066400000000000000000000057011474157777200176610ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := altramax #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -mcpu=neoverse-n1 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=neoverse-n1 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mcpu=neoverse-n1 else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/amd64/000077500000000000000000000000001474157777200145115ustar00rootroot00000000000000blis-1.1/config/amd64/bli_family_amd64.h000066400000000000000000000033321474157777200177650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_FAMILY_AMD64_H #define BLIS_FAMILY_AMD64_H #endif blis-1.1/config/amd64/make_defs.mk000066400000000000000000000050351474157777200167630ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := amd64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Setting for reference and optimized kernels are taken from individual # subconfiguration makefile fragments in this family. # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/amd64_legacy/000077500000000000000000000000001474157777200160355ustar00rootroot00000000000000blis-1.1/config/amd64_legacy/bli_family_amd64_legacy.h000066400000000000000000000035131474157777200226360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_FAMILY_AMD64_LEGACY_H #define BLIS_FAMILY_AMD64_LEGACY_H // Placeholder for bundle configuration. #endif blis-1.1/config/amd64_legacy/make_defs.mk000066400000000000000000000051411474157777200203050ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := amd64_legacy #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Setting for reference and optimized kernels are taken from individual # subconfiguration makefile fragments in this family. # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/arm32/000077500000000000000000000000001474157777200145225ustar00rootroot00000000000000blis-1.1/config/arm32/bli_family_arm32.h000066400000000000000000000034721474157777200200140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 //#endif blis-1.1/config/arm32/make_defs.mk000066400000000000000000000056741474157777200170050ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := arm32 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mfloat-abi=hard -mfpu=neon CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv7-a else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/arm64/000077500000000000000000000000001474157777200145275ustar00rootroot00000000000000blis-1.1/config/arm64/bli_family_arm64.h000066400000000000000000000042701474157777200200230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #define BLIS_SIMD_MAX_SIZE 128 // Note: The default is 64. #define BLIS_SIMD_MAX_NUM_REGISTERS 32 // SVE-specific configs. #define N_L1_SVE_DEFAULT 64 #define W_L1_SVE_DEFAULT 4 #define C_L1_SVE_DEFAULT 256 #define N_L2_SVE_DEFAULT 2048 #define W_L2_SVE_DEFAULT 16 #define C_L2_SVE_DEFAULT 256 #define N_L3_SVE_DEFAULT 8192 #define W_L3_SVE_DEFAULT 16 #define C_L3_SVE_DEFAULT 256 //#endif blis-1.1/config/arm64/make_defs.mk000066400000000000000000000057751474157777200170140ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := arm64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv8-a else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -march=armv8-a else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/armsve/000077500000000000000000000000001474157777200150735ustar00rootroot00000000000000blis-1.1/config/armsve/bli_cntx_init_armsve.c000066400000000000000000000110421474157777200214370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #ifndef HWCAP_SVE #define HWCAP_SVE (1 << 22) #endif void bli_cntx_init_armsve( cntx_t* cntx ) { if (!(getauxval( AT_HWCAP ) & HWCAP_SVE)) return; blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_armsve_ref( cntx ); // ------------------------------------------------------------------------- // Block size. dim_t m_r_s, n_r_s, k_c_s, m_c_s, n_c_s; dim_t m_r_d, n_r_d, k_c_d, m_c_d, n_c_d; dim_t m_r_c, n_r_c, k_c_c, m_c_c, n_c_c; dim_t m_r_z, n_r_z, k_c_z, m_c_z, n_c_z; bli_s_blksz_armsve(&m_r_s, &n_r_s, &k_c_s, &m_c_s, &n_c_s); bli_d_blksz_armsve(&m_r_d, &n_r_d, &k_c_d, &m_c_d, &n_c_d); bli_c_blksz_armsve(&m_r_c, &n_r_c, &k_c_c, &m_c_c, &n_c_c); bli_z_blksz_armsve(&m_r_z, &n_r_z, &k_c_z, &m_c_z, &n_c_z); // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 // These are vector-length agnostic kernels. Yet knowing mr is required at runtime. BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armsve_asm_2vx10_unindexed, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armsve_asm_2vx10_unindexed, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_armsve_asm_2vx10_unindexed, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_armsve_asm_2vx10_unindexed, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Set VL-specific packing routines if applicable. if ( m_r_d == 16 ) { bli_cntx_set_ukrs ( cntx, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_16xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_armsve512_asm_10xk, BLIS_VA_END ); } else if ( m_r_d == 8 ) { bli_cntx_set_ukrs ( cntx, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_armsve256_int_8xk, BLIS_VA_END ); } // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], m_r_s, m_r_d, m_r_c, m_r_z ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], n_r_s, n_r_d, n_r_c, n_r_z ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], m_c_s, m_c_d, m_c_c, m_c_z ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], k_c_s, k_c_d, k_c_c, k_c_z ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], n_c_s, n_c_d, n_c_c, n_c_z ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/armsve/bli_family_armsve.h000066400000000000000000000042031474157777200207270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 256 #define BLIS_SIMD_MAX_NUM_REGISTERS 32 // SVE-specific configs. #define N_L1_SVE_DEFAULT 64 #define W_L1_SVE_DEFAULT 4 #define C_L1_SVE_DEFAULT 256 #define N_L2_SVE_DEFAULT 2048 #define W_L2_SVE_DEFAULT 16 #define C_L2_SVE_DEFAULT 256 #define N_L3_SVE_DEFAULT 8192 #define W_L3_SVE_DEFAULT 16 #define C_L3_SVE_DEFAULT 256 //#endif blis-1.1/config/armsve/bli_kernel_defs_armsve.h000066400000000000000000000042321474157777200217310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- // // The armsve configuration handles both 256-bit and 512-bit SVE vectors, // so it is not possible to define specific register block sizes. Thus, // armsve can't use reference kernels! // #define BLIS_MR_s -1 #define BLIS_MR_d -1 #define BLIS_MR_c -1 #define BLIS_MR_z -1 #define BLIS_NR_s 10 #define BLIS_NR_d 10 #define BLIS_NR_c 10 #define BLIS_NR_z 10 //#endif blis-1.1/config/armsve/make_defs.mk000066400000000000000000000055501474157777200173470ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := armsve #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -ftree-vectorize -march=armv8-a+sve endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) CKVECFLAGS := # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/bgq/000077500000000000000000000000001474157777200143475ustar00rootroot00000000000000blis-1.1/config/bgq/bli_cntx_init_bgq.c000066400000000000000000000063471474157777200202030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_bgq( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_bgq_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_bgq_int_8x8, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_bgq_int_4x4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], -1, 8, -1, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 1024, -1, 768 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 2048, -1, 1536 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 10240, -1, 10240 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/bgq/bli_family_bgq.h000066400000000000000000000062131474157777200174620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #undef restrict #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 1024 #define BLIS_DEFAULT_KC_S 2048 #define BLIS_DEFAULT_NC_S 8192 // 1 MPI RANK CASE: #define BLIS_DGEMM_UKERNEL bli_dgemm_int_8x8 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DEFAULT_MC_D 1024 #define BLIS_DEFAULT_KC_D 2048 #define BLIS_DEFAULT_NC_D 10240 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 1024 #define BLIS_DEFAULT_KC_C 2048 #define BLIS_DEFAULT_NC_C 8192 #define BLIS_ZGEMM_UKERNEL bli_zgemm_int_8x8 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 768 #define BLIS_DEFAULT_KC_Z 1536 #define BLIS_DEFAULT_NC_Z 10240 // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ #define BLIS_DEFAULT_AF_D 8 #define BLIS_DAXPYF_KERNEL bli_daxpyf_opt_var1 // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- #define BLIS_DAXPYV_KERNEL bli_daxpyv_opt_var1 #define BLIS_DDOTV_KERNEL bli_ddotv_opt_var1 #endif //#endif blis-1.1/config/bgq/bli_kernel_defs_bgq.h000066400000000000000000000035721474157777200204670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_d 8 #define BLIS_MR_z 4 #define BLIS_NR_d 8 #define BLIS_NR_z 4 //#endif blis-1.1/config/bgq/make_defs.mk000066400000000000000000000071441474157777200166240ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := bgq #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # #ifeq ($(CC),) #CC := /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpixlc_r #CC_VENDOR := ibm #endif # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk ifeq ($(CC_VENDOR),ibm) CMISCFLAGS := -qthreaded -qsmp=omp -qasm=gcc -qkeyword=asm # -qreport -qsource -qlistopt -qlist else ifeq ($(CC_VENDOR),clang) CMISCFLAGS := -fopenmp else $(error xlc or bgclang is required for this configuration.) endif CPICFLAGS := -fPIC CWARNFLAGS := -w ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),ibm) CKVECFLAGS := -qarch=qp -qtune=qp -qsimd=auto -qhot=level=1 -qprefetch -qunroll=yes -qnoipa endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Override the default value for LDFLAGS. ifeq ($(CC_VENDOR),ibm) LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -qthreaded -qsmp=omp else ifeq ($(CC_VENDOR),clang) LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -fopenmp endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/bulldozer/000077500000000000000000000000001474157777200156005ustar00rootroot00000000000000blis-1.1/config/bulldozer/bli_cntx_init_bulldozer.c000066400000000000000000000067611474157777200226650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_bulldozer( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_bulldozer_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_bulldozer_asm_8x8_fma4, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_bulldozer_asm_4x6_fma4, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_bulldozer_asm_8x4_fma4, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_bulldozer_asm_4x4_fma4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 4, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 8, 6, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 128, 1080, 96, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 384, 120, 256, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 8400, 4096, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/bulldozer/bli_family_bulldozer.h000066400000000000000000000054231474157777200221460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_8x8_fma4 #define BLIS_DEFAULT_MC_S 128 #define BLIS_DEFAULT_KC_S 384 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 8 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x6_fma4 #define BLIS_DEFAULT_MC_D 1080 #define BLIS_DEFAULT_KC_D 120 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 6 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x4_fma4 #define BLIS_DEFAULT_MC_C 96 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x4_fma4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 192 #define BLIS_DEFAULT_NC_Z 4096 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 4 #endif //#endif blis-1.1/config/bulldozer/bli_kernel_defs_bulldozer.h000066400000000000000000000037221474157777200231460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 4 #define BLIS_MR_c 8 #define BLIS_MR_z 4 #define BLIS_NR_s 8 #define BLIS_NR_d 6 #define BLIS_NR_c 4 #define BLIS_NR_z 4 //#endif blis-1.1/config/bulldozer/make_defs.mk000066400000000000000000000061341474157777200200530ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := bulldozer #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/cortexa15/000077500000000000000000000000001474157777200154115ustar00rootroot00000000000000blis-1.1/config/cortexa15/bli_cntx_init_cortexa15.c000066400000000000000000000071541474157777200223040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa15( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa15_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_int_4x4, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_int_4x4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z #if 1 bli_blksz_init_easy( &blkszs[ BLIS_MR ], 4, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 336, 176, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 528, 368, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, -1, -1 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MR ], -1, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 176, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 368, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4096, -1, -1 ); #endif // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/cortexa15/bli_family_cortexa15.h000066400000000000000000000055721474157777200215750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 336 #define BLIS_DEFAULT_KC_S 528 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 176 #define BLIS_DEFAULT_KC_D 368 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif //#endif blis-1.1/config/cortexa15/bli_kernel_defs_cortexa15.h000066400000000000000000000035721474157777200225730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 4 #define BLIS_MR_d 4 #define BLIS_NR_s 4 #define BLIS_NR_d 4 //#endif blis-1.1/config/cortexa15/make_defs.mk000066400000000000000000000057021474157777200176640ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa15 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mfloat-abi=hard -mfpu=neon CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=cortex-a15 else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/cortexa53/000077500000000000000000000000001474157777200154135ustar00rootroot00000000000000blis-1.1/config/cortexa53/bli_cntx_init_cortexa53.c000066400000000000000000000063621474157777200223100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa53( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa53_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 240, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/cortexa53/bli_family_cortexa53.h000066400000000000000000000034101474157777200215660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 blis-1.1/config/cortexa53/bli_kernel_defs_cortexa53.h000066400000000000000000000035731474157777200226000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 6 #define BLIS_NR_s 12 #define BLIS_NR_d 8 //#endif blis-1.1/config/cortexa53/make_defs.mk000066400000000000000000000060531474157777200176660ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa53 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -mcpu=cortex-a53 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=cortex-a53 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mcpu=cortex-a53 else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/cortexa57/000077500000000000000000000000001474157777200154175ustar00rootroot00000000000000blis-1.1/config/cortexa57/bli_cntx_init_cortexa57.c000066400000000000000000000063621474157777200223200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa57( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa57_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 240, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/cortexa57/bli_family_cortexa57.h000066400000000000000000000060241474157777200216020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_opt_8x12 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 12 #define BLIS_DEFAULT_MC_S 120 //1536 //336 //416 // 1280 //160 // 160 // 160 //2048 //336 #define BLIS_DEFAULT_KC_S 640 //1536 //336 //704 //1280 //672 //528 // 856 //2048 //528 #define BLIS_DEFAULT_NC_S 3072 #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_6x8 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DEFAULT_MC_D 120 //1536 //160 //80 //176 #define BLIS_DEFAULT_KC_D 240 //1536 //304 //336 //368 #define BLIS_DEFAULT_NC_D 3072 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif //#endif blis-1.1/config/cortexa57/bli_kernel_defs_cortexa57.h000066400000000000000000000035731474157777200226100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 6 #define BLIS_NR_s 12 #define BLIS_NR_d 8 //#endif blis-1.1/config/cortexa57/make_defs.mk000066400000000000000000000060471474157777200176750ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa57 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -mcpu=cortex-a57 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=cortex-a57 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mcpu=cortex-a57 else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/cortexa9/000077500000000000000000000000001474157777200153345ustar00rootroot00000000000000blis-1.1/config/cortexa9/bli_cntx_init_cortexa9.c000066400000000000000000000063571474157777200221560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa9( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa9_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_int_4x4, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_int_4x4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 4, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 432, 176, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 352, 368, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/cortexa9/bli_family_cortexa9.h000066400000000000000000000055661474157777200214460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 432 #define BLIS_DEFAULT_KC_S 352 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 176 #define BLIS_DEFAULT_KC_D 368 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif //#endif blis-1.1/config/cortexa9/bli_kernel_defs_cortexa9.h000066400000000000000000000035721474157777200224410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 4 #define BLIS_MR_d 4 #define BLIS_NR_s 4 #define BLIS_NR_d 4 //#endif blis-1.1/config/cortexa9/make_defs.mk000066400000000000000000000057001474157777200176050ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa9 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mfloat-abi=hard -mfpu=neon CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=cortex-a9 else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/excavator/000077500000000000000000000000001474157777200155725ustar00rootroot00000000000000blis-1.1/config/excavator/bli_cntx_init_excavator.c000066400000000000000000000067421474157777200226500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_excavator( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_excavator_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_piledriver_asm_16x3, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_piledriver_asm_8x3, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_piledriver_asm_4x2, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_piledriver_asm_2x2, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 4, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 3, 3, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 528, 264, 264, 100 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 320 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8400, 8400, 8400, 8400 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/excavator/bli_family_excavator.h000066400000000000000000000057351474157777200221400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x3 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 3 #define BLIS_DEFAULT_MC_S 528 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 8400 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x3 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 3 #define BLIS_DEFAULT_MC_D 264 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_4x2 #define BLIS_DEFAULT_MR_C 4 #define BLIS_DEFAULT_NR_C 2 #define BLIS_DEFAULT_MC_C 264 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 8400 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_2x2 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 2 #define BLIS_DEFAULT_MC_Z 100 #define BLIS_DEFAULT_KC_Z 320 #define BLIS_DEFAULT_NC_Z 8400 #endif //#endif blis-1.1/config/excavator/bli_kernel_defs_excavator.h000066400000000000000000000037231474157777200231330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 16 #define BLIS_MR_d 8 #define BLIS_MR_c 4 #define BLIS_MR_z 2 #define BLIS_NR_s 3 #define BLIS_NR_d 3 #define BLIS_NR_c 2 #define BLIS_NR_z 2 //#endif blis-1.1/config/excavator/make_defs.mk000066400000000000000000000061561474157777200200510ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := excavator #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/firestorm/000077500000000000000000000000001474157777200156105ustar00rootroot00000000000000blis-1.1/config/firestorm/bli_cntx_init_firestorm.c000066400000000000000000000136531474157777200227030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_firestorm( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_firestorm_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_12x8r, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_8x6r, // packm BLIS_PACKM_MRXK_KER, BLIS_FLOAT, bli_spackm_armv8a_int_12xk, BLIS_PACKM_NRXK_KER, BLIS_FLOAT, bli_spackm_armv8a_int_8xk, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_armv8a_int_8xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_armv8a_int_6xk, // gemmsup BLIS_GEMMSUP_RRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, BLIS_GEMMSUP_RRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_armv8a_asm_6x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, BLIS_GEMMSUP_CRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, BLIS_GEMMSUP_CRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_armv8a_asm_6x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmsup BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 480, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 4096, 3072, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 9600, 8184, -1, -1 ); // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MT ], -1, 99, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NT ], -1, 99, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KT ], -1, 99, -1, -1 ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR_SUP ], -1, 6, -1, -1, -1, 9, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_NR_SUP ], -1, 8, -1, -1, -1, 13, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC_SUP ], -1, 240, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC_SUP ], -1, 1024, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC_SUP ], -1, 3072, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // sup thresholds BLIS_MT, &blkszs[ BLIS_MT ], BLIS_MT, BLIS_NT, &blkszs[ BLIS_NT ], BLIS_NT, BLIS_KT, &blkszs[ BLIS_KT ], BLIS_KT, // level-3 sup BLIS_NC_SUP, &blkszs[ BLIS_NC_SUP ], BLIS_NR_SUP, BLIS_KC_SUP, &blkszs[ BLIS_KC_SUP ], BLIS_KR_SUP, BLIS_MC_SUP, &blkszs[ BLIS_MC_SUP ], BLIS_MR_SUP, BLIS_NR_SUP, &blkszs[ BLIS_NR_SUP ], BLIS_NR_SUP, BLIS_MR_SUP, &blkszs[ BLIS_MR_SUP ], BLIS_MR_SUP, BLIS_VA_END ); } blis-1.1/config/firestorm/bli_family_firestorm.h000066400000000000000000000060241474157777200221640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_opt_8x12 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 12 #define BLIS_DEFAULT_MC_S 120 //1536 //336 //416 // 1280 //160 // 160 // 160 //2048 //336 #define BLIS_DEFAULT_KC_S 640 //1536 //336 //704 //1280 //672 //528 // 856 //2048 //528 #define BLIS_DEFAULT_NC_S 3072 #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_6x8 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DEFAULT_MC_D 120 //1536 //160 //80 //176 #define BLIS_DEFAULT_KC_D 240 //1536 //304 //336 //368 #define BLIS_DEFAULT_NC_D 3072 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif //#endif blis-1.1/config/firestorm/bli_kernel_defs_firestorm.h000066400000000000000000000035731474157777200231720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 6 #define BLIS_NR_s 12 #define BLIS_NR_d 8 //#endif blis-1.1/config/firestorm/make_defs.mk000066400000000000000000000055761474157777200200740ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := firestorm #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -march=armv8-a endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize CKVECFLAGS := -march=armv8-a # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/generic/000077500000000000000000000000001474157777200152125ustar00rootroot00000000000000blis-1.1/config/generic/bli_cntx_init_generic.c000066400000000000000000000034421474157777200217020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_generic( cntx_t* cntx ) { // Set default kernel blocksizes and functions. bli_cntx_init_generic_ref( cntx ); } blis-1.1/config/generic/bli_family_generic.h000066400000000000000000000033071474157777200211710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-1.1/config/generic/bli_kernel_defs_generic.h000066400000000000000000000034401474157777200221670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- //#endif blis-1.1/config/generic/make_defs.mk000066400000000000000000000061101474157777200174570ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := generic #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else ifeq ($(CC_VENDOR),nvc) CKVECFLAGS := else $(error gcc, icc, nvc, or clang is required for this configuration.) endif endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/haswell/000077500000000000000000000000001474157777200152355ustar00rootroot00000000000000blis-1.1/config/haswell/bli_cntx_init_haswell.c000066400000000000000000000247431474157777200217570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_haswell( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_haswell_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // gemm #if 1 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, #else BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_16x6, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_8x6, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_8x3, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_4x3, #endif // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, #if 1 // packm BLIS_PACKM_MRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_16xk, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_4xk, #endif // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif // gemmsup BLIS_GEMMSUP_RRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_CRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16m, BLIS_GEMMSUP_RCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_CRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16n, BLIS_GEMMSUP_CCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // gemm #if 1 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, #else BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, #endif // gemmtrsm_l BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmsup BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z #if 1 bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1008, 1008, 1008, 1008 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, 75, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 6, 6, 3, 3 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1024, 1024, 1024, 1024 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 112, 64, 56, 32 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 112, 72, 56, 44 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, 8, 8 ); // ------------------------------------------------------------------------- // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MT ], 201, 201, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NT ], 201, 201, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KT ], 201, 201, -1, -1 ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR_SUP ], 6, 6, -1, -1, 9, 9, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR_SUP ], 16, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC_SUP ], 168, 72, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC_SUP ], 256, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC_SUP ], 4080, 4080, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, // gemmsup thresholds BLIS_MT, &blkszs[ BLIS_MT ], BLIS_MT, BLIS_NT, &blkszs[ BLIS_NT ], BLIS_NT, BLIS_KT, &blkszs[ BLIS_KT ], BLIS_KT, // level-3 sup BLIS_NC_SUP, &blkszs[ BLIS_NC_SUP ], BLIS_NR_SUP, BLIS_KC_SUP, &blkszs[ BLIS_KC_SUP ], BLIS_KR_SUP, BLIS_MC_SUP, &blkszs[ BLIS_MC_SUP ], BLIS_MR_SUP, BLIS_NR_SUP, &blkszs[ BLIS_NR_SUP ], BLIS_NR_SUP, BLIS_MR_SUP, &blkszs[ BLIS_MR_SUP ], BLIS_MR_SUP, BLIS_VA_END ); } blis-1.1/config/haswell/bli_family_haswell.h000066400000000000000000000114211474157777200212330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- // -- sgemm micro-kernel -- #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_4x24 #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 24 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 6 #define BLIS_DEFAULT_NR_S 16 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 6 #endif // -- dgemm micro-kernel -- #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x12 #define BLIS_DEFAULT_MC_D 152 #define BLIS_DEFAULT_KC_D 160 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 12 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 6 #endif // -- cgemm micro-kernel -- #if 1 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_3x8 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 3 #define BLIS_DEFAULT_NR_C 8 #define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x3 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 3 #endif // -- zgemm micro-kernel -- #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x3 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 3 #endif #endif //#endif blis-1.1/config/haswell/bli_kernel_defs_haswell.h000066400000000000000000000037231474157777200222410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 6 #define BLIS_MR_d 6 #define BLIS_MR_c 3 #define BLIS_MR_z 3 #define BLIS_NR_s 16 #define BLIS_NR_d 8 #define BLIS_NR_c 8 #define BLIS_NR_z 4 //#endif blis-1.1/config/haswell/make_defs.mk000066400000000000000000000066401474157777200175120ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := haswell #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. # NOTE: The -fomit-frame-pointer option is needed for some kernels because # they make explicit use of the rbp register. CKOPTFLAGS := $(COPTFLAGS) -O3 -fomit-frame-pointer ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell ifeq ($(GCC_OT_4_9_0),yes) # If gcc is older than 4.9.0, we must use a different label for -march. CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 endif else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xCORE-AVX2 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/intel64/000077500000000000000000000000001474157777200150635ustar00rootroot00000000000000blis-1.1/config/intel64/bli_family_intel64.h000066400000000000000000000033061474157777200207120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-1.1/config/intel64/make_defs.mk000066400000000000000000000061321474157777200173340ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := intel64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xSSSE3 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/knc/000077500000000000000000000000001474157777200143515ustar00rootroot00000000000000blis-1.1/config/knc/bli_cntx_init_knc.c000066400000000000000000000064061474157777200202030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_knc( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_knc_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_knc_asm_30x8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], -1, 30, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 120, -1, -1, -1, 160, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_KC ], -1, 240, -1, -1, -1, 300, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 14400, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/knc/bli_family_knc.h000066400000000000000000000067241474157777200174750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- THREADING PARAMTERS ------------------------------------------------------ #define BLIS_TREE_BARRIER #define BLIS_TREE_BARRIER_ARITY 4 // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 64 #define BLIS_SIMD_MAX_SIZE 64 #define BLIS_SIMD_MAX_NUM_REGISTERS 32 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_30x16 #define BLIS_DEFAULT_MR_S 30 #define BLIS_DEFAULT_NR_S 16 #define BLIS_DEFAULT_MC_S 240 #define BLIS_DEFAULT_KC_S 240 #define BLIS_DEFAULT_NC_S 14400 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_30x8 #define BLIS_DEFAULT_MR_D 30 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 240 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) #define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) #define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + 0) #define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) #define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) #define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + 0) #define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + 2) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) #define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + 2) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) #endif //#endif blis-1.1/config/knc/bli_kernel_defs_knc.h000066400000000000000000000035531474157777200204720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_d 30 #define BLIS_NR_d 8 #define BLIS_PACKMR_d 32 //#endif blis-1.1/config/knc/make_defs.mk000066400000000000000000000061071474157777200166240ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := knc #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mmic -fasm-blocks CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),icc) CKVECFLAGS := else $(error icc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Override the default value for LDFLAGS. LDFLAGS := -mmic # Never use libm with Intel compilers. ifneq ($(CC_VENDOR),icc) LDFLAGS += $(LIBM) endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/knl/000077500000000000000000000000001474157777200143625ustar00rootroot00000000000000blis-1.1/config/knl/bli_cntx_init_knl.c000066400000000000000000000114711474157777200202230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_knl( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_knl_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_knl_asm_24x16, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_knl_asm_24x8, // packm BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_knl_asm_24xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_knl_asm_8xk, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, #if 1 // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, #endif // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 24, 24, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_MC ], 240, 120, -1, -1, 288, 144, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_KC ], 336, 336, -1, -1, 408, 408, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 14400, 14400, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, BLIS_VA_END ); } blis-1.1/config/knl/bli_family_knl.h000066400000000000000000000116171474157777200175140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- THREADING PARAMETERS ----------------------------------------------------- #define BLIS_THREAD_RATIO_M 4 #define BLIS_THREAD_RATIO_N 1 #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 // -- MEMORY ALLOCATION -------------------------------------------------------- //#define BLIS_TREE_BARRIER //#define BLIS_TREE_BARRIER_ARITY 4 #define BLIS_SIMD_ALIGN_SIZE 64 #define BLIS_SIMD_MAX_SIZE 64 #define BLIS_SIMD_MAX_NUM_REGISTERS 32 /* #ifdef BLIS_NO_HBWMALLOC #include #define BLIS_MALLOC_POOL malloc #define BLIS_FREE_POOL free #else #include #define BLIS_MALLOC_POOL hbw_malloc #define BLIS_FREE_POOL hbw_free #endif */ //#define BLIS_MALLOC_INTL hbw_malloc //#define BLIS_FREE_INTL hbw_free #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_SGEMM_UKERNEL bli_sgemm_opt_30x16_knc #define BLIS_DEFAULT_MC_S 240 #define BLIS_DEFAULT_KC_S 240 #define BLIS_DEFAULT_NC_S 14400 #define BLIS_DEFAULT_MR_S 30 #define BLIS_DEFAULT_NR_S 16 #define BLIS_PACKDIM_MR_S 32 #define BLIS_PACKDIM_NR_S 16 #if 0 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_30x8_knc #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 240 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_DEFAULT_MR_D 30 #define BLIS_DEFAULT_NR_D 8 #define BLIS_PACKDIM_MR_D 32 #define BLIS_PACKDIM_NR_D 8 #elif 0 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_30x8 #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 240 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_DEFAULT_MR_D 30 #define BLIS_DEFAULT_NR_D 8 #define BLIS_PACKDIM_MR_D 32 #define BLIS_PACKDIM_NR_D 8 #define BLIS_DPACKM_8XK_KERNEL bli_dpackm_8xk_opt #define BLIS_DPACKM_30XK_KERNEL bli_dpackm_30xk_opt #else #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_24x8 #define BLIS_DEFAULT_MR_D 24 #define BLIS_DEFAULT_NR_D 8 #define BLIS_PACKDIM_MR_D 24 #define BLIS_PACKDIM_NR_D 8 #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 336 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_DPACKM_8XK_KERNEL bli_dpackm_8xk_opt #define BLIS_DPACKM_24XK_KERNEL bli_dpackm_24xk_opt #endif #define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) #define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) #define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + 0) #define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) #define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) #define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + 0) #endif //#endif blis-1.1/config/knl/bli_kernel_defs_knl.h000066400000000000000000000035751474157777200205200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 24 #define BLIS_MR_d 24 #define BLIS_NR_s 16 #define BLIS_NR_d 8 //#endif blis-1.1/config/knl/make_defs.mk000066400000000000000000000100041474157777200166240ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := knl #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif ifeq ($(DEBUG_TYPE),sde) # Unconditionally disable use of libmemkind in Intel SDE. # Note: The BLIS_DISABLE_MEMKIND macro definition will override # (undefine) the BLIS_ENABLE_MEMKIND macro definition. CPPROCFLAGS += -DBLIS_DISABLE_MEMKIND # This value is normally set by configure and communicated to make via # config.mk, however, the make_defs.mk files (this file) get included # after config.mk, so this definition will override that earlier # definition. MK_ENABLE_MEMKIND := no endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xMIC-AVX512 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # The assembler on OS X won't recognize AVX512 without help. ifneq ($(CC_VENDOR),icc) ifeq ($(OS_NAME),Darwin) CKVECFLAGS += -Wa,-march=knl endif endif # Flags specific to reference kernels. # Note: We use AVX2 for reference kernels instead of AVX-512. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),icc) CRVECFLAGS := -xMIC-AVX512 else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/old/000077500000000000000000000000001474157777200143545ustar00rootroot00000000000000blis-1.1/config/old/armv7a/000077500000000000000000000000001474157777200155515ustar00rootroot00000000000000blis-1.1/config/old/armv7a/bli_cntx_init_armv7a.c000066400000000000000000000062311474157777200220210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_armv7a( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_armv7a_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_asm_4x4, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_asm_4x4, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_armv7a_asm_2x2, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_armv7a_asm_2x2, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 4, 4, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 432, 192, 64, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 352, 256, 128, 128 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 4096, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-1.1/config/old/armv7a/bli_family_armv7a.h000066400000000000000000000057701474157777200213170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_FAMILY_H #define BLIS_FAMILY_H // -- ARCHITECTURE-SPECIFIC PROTOTYPES ----------------------------------------- // Define the current architecture's name. #define archname armv7a // Include the context initialization function API template. #include "bli_cntx_init_arch.h" #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_opt_4x4 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 432 #define BLIS_DEFAULT_KC_S 352 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 192 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_CGEMM_UKERNEL bli_cgemm_opt_4x4 #define BLIS_DEFAULT_MR_C 2 #define BLIS_DEFAULT_NR_C 2 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_ZGEMM_UKERNEL bli_zgemm_opt_4x4 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 2 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif #endif blis-1.1/config/old/armv7a/make_defs.mk000066400000000000000000000052131474157777200200210ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := armv7a #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # ifeq ($(CC),) CC := gcc CC_VENDOR := gcc endif # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 -mfloat-abi=hard CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpu=vfpv3 -marm -march=armv7-a else $(error gcc is required for this configuration.) endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/old/emscripten/000077500000000000000000000000001474157777200165255ustar00rootroot00000000000000blis-1.1/config/old/emscripten/bli_kernel.h000066400000000000000000000146161474157777200210140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H /* Use the same parameters as non-SIMD PNaCl */ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 // -- Cache blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #define BLIS_DEFAULT_MC_S 252 #define BLIS_DEFAULT_KC_S 264 #define BLIS_DEFAULT_NC_S 8196 #define BLIS_DEFAULT_MC_D 1080 #define BLIS_DEFAULT_KC_D 120 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_DEFAULT_MC_C 120 #define BLIS_DEFAULT_KC_C 264 #define BLIS_DEFAULT_NC_C 4092 #define BLIS_DEFAULT_MC_Z 60 #define BLIS_DEFAULT_KC_Z 264 #define BLIS_DEFAULT_NC_Z 2040 // -- Register blocksizes -- #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 3 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 3 #define BLIS_DEFAULT_MR_C 2 #define BLIS_DEFAULT_NR_C 3 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 3 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. //#define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) //#define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) //#define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + BLIS_DEFAULT_NC_S/4) //#define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) //#define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) //#define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + BLIS_DEFAULT_NC_D/4) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) // -- Packing register blocksize (for packed micro-panels) -- // NOTE: These register blocksize "extensions" determine whether the // leading dimensions used within the packed micro-panels are equal to // or greater than their corresponding register blocksizes above. //#define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + ...) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) //#define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + ...) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) //#define BLIS_PACKDIM_MR_C (BLIS_DEFAULT_MR_C + ...) //#define BLIS_PACKDIM_NR_C (BLIS_DEFAULT_NR_C + ...) //#define BLIS_PACKDIM_MR_Z (BLIS_DEFAULT_MR_Z + ...) //#define BLIS_PACKDIM_NR_Z (BLIS_DEFAULT_NR_Z + ...) // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-3 KERNEL DEFINITIONS ----------------------------------------------- // -- gemm -- // -- trsm-related -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- // -- dotxf -- // -- dotxaxpyf -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- addv -- // -- axpyv -- // -- copyv -- // -- dotv -- // -- dotxv -- // -- invertv -- // -- scal2v -- // -- scalv -- // -- setv -- // -- subv -- // -- swapv -- #endif blis-1.1/config/old/emscripten/make_defs.mk000066400000000000000000000053051474157777200207770ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once. ifndef MAKE_DEFS_MK_INCLUDED MAKE_DEFS_MK_INCLUDED := yes # # --- Development tools definitions -------------------------------------------- # # --- Determine the C compiler and related flags --- CC := emcc CC_VENDOR := emcc # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 CPICFLAGS := -fPIC CDBGFLAGS := #-g4 CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors COPTFLAGS := -O2 CKOPTFLAGS := -O3 CKVECFLAGS := # --- Determine the archiver and related flags --- AR := emar RANLIB := emranlib ARFLAGS := cr # --- Determine the linker and related flags --- LINKER := $(CC) SOFLAGS := -shared LDFLAGS := -O3 -s TOTAL_MEMORY=67108864 -s FORCE_ALIGNED_MEMORY=1 -s PRECISE_F32=2 -s GC_SUPPORT=0 # --- Determine JS interpreter --- JSINT := node # end of ifndef MAKE_DEFS_MK_INCLUDED conditional block endif blis-1.1/config/old/haswellbb/000077500000000000000000000000001474157777200163175ustar00rootroot00000000000000blis-1.1/config/old/haswellbb/bli_cntx_init_haswell.c000066400000000000000000000256671474157777200230470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Instantiate prototypes for packm kernels. PACKM_KER_PROT( float, s, packm_6xk_bb4_haswell_ref ) PACKM_KER_PROT( double, d, packm_6xk_bb2_haswell_ref ) // Instantiate prototypes for level-3 kernels. GEMM_UKR_PROT( float, s, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( float, s, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( float, s, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( float, s, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( float, s, trsmbb_u_haswell_ref ) GEMM_UKR_PROT( double, d, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( double, d, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( double, d, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( double, d, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( double, d, trsmbb_u_haswell_ref ) GEMM_UKR_PROT( scomplex, c, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( scomplex, c, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( scomplex, c, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( scomplex, c, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( scomplex, c, trsmbb_u_haswell_ref ) GEMM_UKR_PROT( dcomplex, z, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( dcomplex, z, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( dcomplex, z, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( dcomplex, z, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( dcomplex, z, trsmbb_u_haswell_ref ) void bli_cntx_init_haswell( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; blksz_t thresh[ BLIS_NUM_THRESH ]; // Set default kernel blocksizes and functions. bli_cntx_init_haswell_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( #if 0 8, // gemm BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE, #else 12, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_FLOAT, bli_strsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_FLOAT, bli_strsmbb_u_haswell_ref, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_DOUBLE, bli_dtrsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_DOUBLE, bli_dtrsmbb_u_haswell_ref, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_SCOMPLEX, bli_ctrsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_SCOMPLEX, bli_ctrsmbb_u_haswell_ref, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_DCOMPLEX, bli_ztrsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_DCOMPLEX, bli_ztrsmbb_u_haswell_ref, FALSE, #endif cntx ); // Update the context with customized virtual [gemm]trsm micro-kernels. bli_cntx_set_l3_vir_ukrs ( 8, BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsmbb_u_haswell_ref, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsmbb_u_haswell_ref, BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsmbb_u_haswell_ref, BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsmbb_u_haswell_ref, cntx ); // Update the context with optimized packm kernels. bli_cntx_set_packm_kers ( 2, BLIS_PACKM_6XK_KER, BLIS_FLOAT, bli_spackm_6xk_bb4_haswell_ref, BLIS_PACKM_6XK_KER, BLIS_DOUBLE, bli_dpackm_6xk_bb2_haswell_ref, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 4, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 10, #if 1 // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, #endif // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z #if 0 bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, 75, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MR ], 24, 12, 12, 6 ); bli_blksz_init ( &blkszs[ BLIS_NR ], 6, 6, 6, 6, 24, 12, 6, 6 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 2076 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, 8, 8 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( 7, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); // ------------------------------------------------------------------------- // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &thresh[ BLIS_MT ], -1, 1, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_NT ], -1, 1, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_KT ], -1, 1, -1, -1 ); // Initialize the context with the sup thresholds. bli_cntx_set_l3_sup_thresh ( 3, BLIS_MT, &thresh[ BLIS_MT ], BLIS_NT, &thresh[ BLIS_NT ], BLIS_KT, &thresh[ BLIS_KT ], cntx ); // Update the context with optimized small/unpacked gemm kernels. bli_cntx_set_l3_sup_kers ( 8, //BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref, BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE, BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE, BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, cntx ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1, -1, 9, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 72, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4080, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes for small/unpacked level-3 problems. bli_cntx_set_l3_sup_blkszs ( 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_KC, &blkszs[ BLIS_KC ], BLIS_MC, &blkszs[ BLIS_MC ], BLIS_NR, &blkszs[ BLIS_NR ], BLIS_MR, &blkszs[ BLIS_MR ], cntx ); } blis-1.1/config/old/haswellbb/bli_family_haswell.h000066400000000000000000000122601474157777200223170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096 #define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096 #define BLIS_POOL_ADDR_OFFSET_SIZE_A 32 #define BLIS_POOL_ADDR_OFFSET_SIZE_B 64 // Disable right-side hemm, symm, and trmm[3] to accommodate the broadcasting of // elements within the packed matrix B. #define BLIS_DISABLE_HEMM_RIGHT #define BLIS_DISABLE_SYMM_RIGHT #define BLIS_DISABLE_TRMM_RIGHT #define BLIS_DISABLE_TRMM3_RIGHT #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- // -- sgemm micro-kernel -- #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_4x24 #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 24 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 6 #define BLIS_DEFAULT_NR_S 16 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 6 #endif // -- dgemm micro-kernel -- #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x12 #define BLIS_DEFAULT_MC_D 152 #define BLIS_DEFAULT_KC_D 160 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 12 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 6 #endif // -- cgemm micro-kernel -- #if 1 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_3x8 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 3 #define BLIS_DEFAULT_NR_C 8 #define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x3 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 3 #endif // -- zgemm micro-kernel -- #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x3 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 3 #endif #endif //#endif blis-1.1/config/old/haswellbb/make_defs.mk000066400000000000000000000064161474157777200205750ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := haswell #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell ifeq ($(GCC_OT_4_9_0),yes) # If gcc is older than 4.9.0, we must use a different label for -march. CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 endif else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xCORE-AVX2 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/old/loongson3a/000077500000000000000000000000001474157777200164365ustar00rootroot00000000000000blis-1.1/config/old/loongson3a/bli_kernel.h000066400000000000000000000146231474157777200207230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 // -- Cache blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 8192 #define BLIS_DEFAULT_MC_D 32 #define BLIS_DEFAULT_KC_D 128 #define BLIS_DEFAULT_NC_D 1024 #define BLIS_DEFAULT_MC_C 128 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 2048 // -- Register blocksizes -- #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. //#define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) //#define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) //#define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + BLIS_DEFAULT_NC_S/4) //#define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) //#define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) //#define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + BLIS_DEFAULT_NC_D/4) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) // -- Packing register blocksize (for packed micro-panels) -- // NOTE: These register blocksize "extensions" determine whether the // leading dimensions used within the packed micro-panels are equal to // or greater than their corresponding register blocksizes above. //#define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + ...) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) //#define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + ...) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) //#define BLIS_PACKDIM_MR_C (BLIS_DEFAULT_MR_C + ...) //#define BLIS_PACKDIM_NR_C (BLIS_DEFAULT_NR_C + ...) //#define BLIS_PACKDIM_MR_Z (BLIS_DEFAULT_MR_Z + ...) //#define BLIS_PACKDIM_NR_Z (BLIS_DEFAULT_NR_Z + ...) // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-3 KERNEL DEFINITIONS ----------------------------------------------- // -- gemm -- #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_4x4 // -- trsm-related -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- // -- dotxf -- // -- dotxaxpyf -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- addv -- // -- axpyv -- // -- copyv -- // -- dotv -- // -- dotxv -- // -- invertv -- // -- scal2v -- // -- scalv -- // -- setv -- // -- subv -- // -- swapv -- #endif blis-1.1/config/old/loongson3a/make_defs.mk000066400000000000000000000052121474157777200207050ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := loongson3a #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # ifeq ($(CC),) CC := gcc CC_VENDOR := gcc endif # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L -mabi=64 CMISCFLAGS := -std=c99 CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -mtune=loongson3a endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=loongson3a else $(error gcc is required for this configuration.) endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/old/newarch/000077500000000000000000000000001474157777200160035ustar00rootroot00000000000000blis-1.1/config/old/newarch/bli_kernel.h000066400000000000000000000033001474157777200202560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H #endif blis-1.1/config/old/newarch/make_defs.mk000066400000000000000000000053171474157777200202600ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := newarch #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # ifeq ($(CC),) CC := gcc CC_VENDOR := gcc endif # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/old/pnacl/000077500000000000000000000000001474157777200154515ustar00rootroot00000000000000blis-1.1/config/old/pnacl/bli_kernel.h000066400000000000000000000172151474157777200177360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H /* * SIMD-enabled (SP only) PNaCl shipped in Chrome 36 and it is not backward-compatible. * Therefore, if compilation targets an older Chrome release, we use scalar kernels. * The target Chrome version is indicated by PPAPI_MACRO defined in the header below. */ #include // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 // -- Cache blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 8192 #else #define BLIS_DEFAULT_MC_S 252 #define BLIS_DEFAULT_KC_S 264 #define BLIS_DEFAULT_NC_S 8196 #endif #define BLIS_DEFAULT_MC_D 1080 #define BLIS_DEFAULT_KC_D 120 #define BLIS_DEFAULT_NC_D 8400 #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MC_C 128 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #else #define BLIS_DEFAULT_MC_C 120 #define BLIS_DEFAULT_KC_C 264 #define BLIS_DEFAULT_NC_C 4092 #endif #define BLIS_DEFAULT_MC_Z 60 #define BLIS_DEFAULT_KC_Z 264 #define BLIS_DEFAULT_NC_Z 2040 // -- Register blocksizes -- #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #else #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 3 #endif #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 3 #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MR_C 4 #define BLIS_DEFAULT_NR_C 4 #else #define BLIS_DEFAULT_MR_C 2 #define BLIS_DEFAULT_NR_C 3 #endif #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 3 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. //#define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) //#define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) //#define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + BLIS_DEFAULT_NC_S/4) //#define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) //#define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) //#define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + BLIS_DEFAULT_NC_D/4) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) // -- Packing register blocksize (for packed micro-panels) -- // NOTE: These register blocksize "extensions" determine whether the // leading dimensions used within the packed micro-panels are equal to // or greater than their corresponding register blocksizes above. //#define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + ...) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) //#define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + ...) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) //#define BLIS_PACKDIM_MR_C (BLIS_DEFAULT_MR_C + ...) //#define BLIS_PACKDIM_NR_C (BLIS_DEFAULT_NR_C + ...) //#define BLIS_PACKDIM_MR_Z (BLIS_DEFAULT_MR_Z + ...) //#define BLIS_PACKDIM_NR_Z (BLIS_DEFAULT_NR_Z + ...) // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-3 KERNEL DEFINITIONS ----------------------------------------------- // -- gemm -- #if PPAPI_RELEASE >= 36 #define BLIS_SGEMM_UKERNEL bli_sgemm_opt #define BLIS_CGEMM_UKERNEL bli_cgemm_opt #endif // -- trsm-related -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- // -- dotxf -- // -- dotxaxpyf -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- addv -- // -- axpyv -- #if PPAPI_RELEASE >= 36 #define BLIS_SAXPYV_KERNEL bli_saxpyv_opt #define BLIS_CAXPYV_KERNEL bli_caxpyv_opt #endif // -- copyv -- // -- dotv -- #define BLIS_SDOTV_KERNEL bli_sdotv_opt #define BLIS_DDOTV_KERNEL bli_ddotv_opt #define BLIS_CDOTV_KERNEL bli_cdotv_opt #define BLIS_ZDOTV_KERNEL bli_zdotv_opt // -- dotxv -- // -- invertv -- // -- scal2v -- // -- scalv -- // -- setv -- // -- subv -- // -- swapv -- #endif blis-1.1/config/old/pnacl/make_defs.mk000066400000000000000000000056461474157777200177330ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once. ifndef MAKE_DEFS_MK_INCLUDED MAKE_DEFS_MK_INCLUDED := yes # # --- Development tools definitions -------------------------------------------- # # --- Determine the C compiler and related flags --- CC := pnacl-clang CC_VENDOR := pnacl-clang # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=gnu11 -I$(NACL_SDK_ROOT)/include CPICFLAGS := -fPIC CDBGFLAGS := -g CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors COPTFLAGS := -O3 CKOPTFLAGS := $(COPTFLAGS) -ffast-math CKVECFLAGS := # --- Determine the archiver and related flags --- AR := pnacl-ar ARFLAGS := rcs # --- Determine the linker and related flags --- LINKER := $(CC) SOFLAGS := ifneq ($(CC_VENDOR),icc) LDFLAGS := -lm endif # --- Determine the finalizer and related flags --- FINALIZER := pnacl-finalize FINFLAGS := # --- Determine the translator and related flags --- TRANSLATOR := pnacl-translate TRNSFLAGS := -O3 TRNSAMD64FLAGS := -arch x86-64 TRNSX86FLAGS := -arch i686 TRNSARMFLAGS := -arch armv7 # end of ifndef MAKE_DEFS_MK_INCLUDED conditional block endif blis-1.1/config/penryn/000077500000000000000000000000001474157777200151115ustar00rootroot00000000000000blis-1.1/config/penryn/bli_cntx_init_penryn.c000066400000000000000000000072751474157777200215100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_penryn( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_penryn_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, //level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_penryn_asm_8x4, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_penryn_asm_4x4, //BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_penryn_asm_8x4, //BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_penryn_asm_4x4, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_penryn_asm_4x4, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_penryn_asm_4x4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, //level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, //BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, //BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 768, 384, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 384, 384, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-1 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/penryn/bli_family_penryn.h000066400000000000000000000061321474157777200207660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_8x4 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 768 #define BLIS_DEFAULT_KC_S 384 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 384 #define BLIS_DEFAULT_KC_D 384 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DGEMMTRSM_L_UKERNEL bli_dgemmtrsm_l_asm_4x4 #define BLIS_DGEMMTRSM_U_UKERNEL bli_dgemmtrsm_u_asm_4x4 // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- #define BLIS_DAXPY2V_KERNEL bli_daxpy2v_int_var1 #define BLIS_DDOTAXPYV_KERNEL bli_ddotaxpyv_int_var1 #define BLIS_DAXPYF_KERNEL bli_daxpyf_int_var1 #define BLIS_DDOTXF_KERNEL bli_ddotxf_int_var1 #define BLIS_DDOTXAXPYF_KERNEL bli_ddotxaxpyf_int_var1 // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- #define BLIS_DAXPYV_KERNEL bli_daxpyv_opt_var1 #define BLIS_DDOTV_KERNEL bli_ddotv_opt_var1 #endif //#endif blis-1.1/config/penryn/bli_kernel_defs_penryn.h000066400000000000000000000035721474157777200217730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 4 #define BLIS_NR_s 4 #define BLIS_NR_d 4 //#endif blis-1.1/config/penryn/make_defs.mk000066400000000000000000000061311474157777200173610ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := penryn #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xSSSE3 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/piledriver/000077500000000000000000000000001474157777200157435ustar00rootroot00000000000000blis-1.1/config/piledriver/bli_cntx_init_piledriver.c000066400000000000000000000067441474157777200231740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_piledriver( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_piledriver_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_piledriver_asm_16x3, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_piledriver_asm_8x3, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_piledriver_asm_4x2, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_piledriver_asm_2x2, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 4, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 3, 3, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 2016, 1008, 512, 400 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 128, 128, 256, 160 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8400, 8400, 8400, 8400 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/piledriver/bli_family_piledriver.h000066400000000000000000000057171474157777200224620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x3 #define BLIS_DEFAULT_MC_S 2016 #define BLIS_DEFAULT_KC_S 128 #define BLIS_DEFAULT_NC_S 8400 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 3 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x3 #define BLIS_DEFAULT_MC_D 1008 #define BLIS_DEFAULT_KC_D 128 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 3 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_4x2 #define BLIS_DEFAULT_MC_C 512 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 8400 #define BLIS_DEFAULT_MR_C 4 #define BLIS_DEFAULT_NR_C 2 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_2x2 #define BLIS_DEFAULT_MC_Z 400 #define BLIS_DEFAULT_KC_Z 160 #define BLIS_DEFAULT_NC_Z 8400 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 2 #endif //#endif blis-1.1/config/piledriver/bli_kernel_defs_piledriver.h000066400000000000000000000037231474157777200234550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 16 #define BLIS_MR_d 8 #define BLIS_MR_c 4 #define BLIS_MR_z 2 #define BLIS_NR_s 3 #define BLIS_NR_d 3 #define BLIS_NR_c 2 #define BLIS_NR_z 2 //#endif blis-1.1/config/piledriver/make_defs.mk000066400000000000000000000061571474157777200202230ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := piledriver #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/power/000077500000000000000000000000001474157777200147325ustar00rootroot00000000000000blis-1.1/config/power/bli_family_power.h000066400000000000000000000033061474157777200204300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-1.1/config/power/make_defs.mk000066400000000000000000000054711474157777200172100ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := power #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 CKVECFLAGS := # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/power10/000077500000000000000000000000001474157777200150735ustar00rootroot00000000000000blis-1.1/config/power10/bli_cntx_init_power10.c000066400000000000000000000062531474157777200214470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_power10( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_power10_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_power10_mma_8x16, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_power10_mma_8x8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_VA_END ); // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 832, 320, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 1026, 960, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/power10/bli_family_power10.h000066400000000000000000000034561474157777200207400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096 #define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096 #define BLIS_POOL_ADDR_OFFSET_SIZE_A 192 #define BLIS_POOL_ADDR_OFFSET_SIZE_B 152 blis-1.1/config/power10/bli_kernel_defs_power10.h000066400000000000000000000035741474157777200217410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 8 #define BLIS_NR_s 16 #define BLIS_NR_d 8 //#endif blis-1.1/config/power10/make_defs.mk000066400000000000000000000054761474157777200173560ustar00rootroot00000000000000 # # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2019, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := power10 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=power10 -mtune=power10 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mcpu=power10 -mtune=power10 else $(info $(CC_VENDOR)) $(error gcc, clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) CRVECFLAGS := $(CKVECFLAGS) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/power7/000077500000000000000000000000001474157777200150215ustar00rootroot00000000000000blis-1.1/config/power7/bli_cntx_init_power7.c000066400000000000000000000062031474157777200213160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_power7( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_power7_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_power7_int_8x4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], -1, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 4, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 64, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4096, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/power7/bli_family_power7.h000066400000000000000000000040621474157777200206060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_8x4 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 64 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4096 #endif //#endif blis-1.1/config/power7/bli_kernel_defs_power7.h000066400000000000000000000035161474157777200216110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_d 8 #define BLIS_NR_d 4 //#endif blis-1.1/config/power7/make_defs.mk000066400000000000000000000056631474157777200173020ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := power7 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mcpu=power7 CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -mtune=power7 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mvsx else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/power9/000077500000000000000000000000001474157777200150235ustar00rootroot00000000000000blis-1.1/config/power9/bli_cntx_init_power9.c000066400000000000000000000071571474157777200213330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_power9( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_power9_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_power9_asm_12x6, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_TRSM_L_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_TRSM_U_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_TRSM_L_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_TRSM_U_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_TRSM_L_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_TRSM_U_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_TRSM_L_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_TRSM_U_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); bli_blksz_init_easy( &blkszs[ BLIS_MR ], -1, 12, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_NR ], -1, 6, -1, -1, -1, 12, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 576, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 1408, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 8190, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/power9/bli_family_power9.h000066400000000000000000000040511474157777200206100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096 #define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096 #define BLIS_POOL_ADDR_OFFSET_SIZE_A 192 #define BLIS_POOL_ADDR_OFFSET_SIZE_B 152 // Disable right-side hemm, symm, and trmm[3] to accommodate the broadcasting of // elements within the packed matrix B. #define BLIS_DISABLE_HEMM_RIGHT #define BLIS_DISABLE_SYMM_RIGHT #define BLIS_DISABLE_TRMM_RIGHT #define BLIS_DISABLE_TRMM3_RIGHT blis-1.1/config/power9/bli_kernel_defs_power9.h000066400000000000000000000035761474157777200216230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_d 12 #define BLIS_NR_d 6 #define BLIS_BBN_s 4 #define BLIS_BBN_d 2 //#endif blis-1.1/config/power9/make_defs.mk000066400000000000000000000055021474157777200172740ustar00rootroot00000000000000 # # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2019, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := power9 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=power9 -mtune=power9 -DXLC=0 else ifeq ($(CC_VENDOR),IBM) CKVECFLAGS := -qarch=pwr9 -qtune=pwr9 -DXLC=1 else $(info $(CC_VENDOR)) $(error gcc/xlc is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) CRVECFLAGS := $(CKVECFLAGS) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/rv32i/000077500000000000000000000000001474157777200145435ustar00rootroot00000000000000blis-1.1/config/rv32i/bli_cntx_init_rv32i.c000066400000000000000000000035541474157777200205700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_rv32i( cntx_t* cntx ) { // Set default kernel blocksizes and functions. bli_cntx_init_rv32i_ref( cntx ); // ------------------------------------------------------------------------- } blis-1.1/config/rv32i/bli_kernel_defs_rv32i.h000066400000000000000000000035011474157777200210470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- // Fall through to generic sizes //#endif blis-1.1/config/rv32i/make_defs.mk000066400000000000000000000070771474157777200170250ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := rv32i #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -DRISCV_SIZE=32 RISCV_ARCH := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') RISCV_ABI := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') ifeq (,$(findstring 32,$(RISCV_ARCH))) $(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) else ifeq (,$(findstring 32,$(RISCV_ABI))) $(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) endif CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors # In case the A extension is not available LDFLAGS += -latomic ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/rv32iv/000077500000000000000000000000001474157777200147315ustar00rootroot00000000000000blis-1.1/config/rv32iv/bli_cntx_init_rv32iv.c000066400000000000000000000076031474157777200211430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "../../kernels/rviv/3/bli_rviv_utils.h" void bli_cntx_init_rv32iv( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_rv32iv_ref( cntx ); // ------------------------------------------------------------------------- // A reasonable assumptions for application cores is VLEN >= 128 bits, i.e., // v >= 4. Embedded cores, however, may implement the minimal configuration, // which allows VLEN = 32 bits. Here, we assume VLEN >= 128 and otherwise // fall back to the reference kernels. const uint32_t v = get_vlenb() / sizeof(float); if ( v >= 4 ) { const uint32_t mr_s = 4 * v; const uint32_t mr_d = 2 * v; const uint32_t mr_c = 2 * v; const uint32_t mr_z = v; // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_rviv_4vx4, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_rviv_4vx4, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_rviv_4vx4, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_rviv_4vx4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], mr_s, mr_d, mr_c, mr_z ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 20*mr_s, 20*mr_d, 60*mr_c, 30*mr_z ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 320, 320, 160 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, 3072, 3072 ); bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } } blis-1.1/config/rv32iv/bli_kernel_defs_rv32iv.h000066400000000000000000000034411474157777200214260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- //#endif blis-1.1/config/rv32iv/make_defs.mk000066400000000000000000000073221474157777200172040ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := rv32iv #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -DRISCV_SIZE=32 RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') ifeq (,$(findstring 32,$(RISCV_ARCH))) $(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) else ifeq (,$(findstring 32,$(RISCV_ABI))) $(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) endif CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors # In case the A extension is not available LDFLAGS += -latomic ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O0 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) # Lower compiler optimization to -O1. At -O3, gcc version 12.0.1 20220505 # computes offsets for the matrix ab in the ref gemm kernel incorrectly. CRVECFLAGS := $(CKVECFLAGS) -O1 else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/rv64i/000077500000000000000000000000001474157777200145505ustar00rootroot00000000000000blis-1.1/config/rv64i/bli_cntx_init_rv64i.c000066400000000000000000000035541474157777200206020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_rv64i( cntx_t* cntx ) { // Set default kernel blocksizes and functions. bli_cntx_init_rv64i_ref( cntx ); // ------------------------------------------------------------------------- } blis-1.1/config/rv64i/bli_kernel_defs_rv64i.h000066400000000000000000000035011474157777200210610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- // Fall through to generic sizes //#endif blis-1.1/config/rv64i/make_defs.mk000066400000000000000000000070771474157777200170320ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := rv64i #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -DRISCV_SIZE=64 RISCV_ARCH := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') RISCV_ABI := $(shell $(CC) -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') ifeq (,$(findstring 64,$(RISCV_ARCH))) $(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) else ifeq (,$(findstring 64,$(RISCV_ABI))) $(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) endif CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors # In case the A extension is not available LDFLAGS += -latomic ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/rv64iv/000077500000000000000000000000001474157777200147365ustar00rootroot00000000000000blis-1.1/config/rv64iv/bli_cntx_init_rv64iv.c000066400000000000000000000101621474157777200211470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "../../kernels/rviv/3/bli_rviv_utils.h" void bli_cntx_init_rv64iv( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_rv64iv_ref( cntx ); // ------------------------------------------------------------------------- // A reasonable assumptions for application cores is VLEN >= 128 bits, i.e., // v >= 4. Embedded cores, however, may implement the minimal configuration, // which allows VLEN = 32 bits. Here, we assume VLEN >= 128 and otherwise // fall back to the reference kernels. const uint32_t v = get_vlenb() / sizeof(float); if ( v >= 4 ) { const uint32_t mr_s = 4 * v; const uint32_t mr_d = 2 * v; const uint32_t mr_c = 2 * v; const uint32_t mr_z = v; // TODO: Register different kernels based on the value // of v to avoid MC becoming too big. (e.g. 2vx8) // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_rviv_4vx4, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_rviv_4vx4, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_rviv_4vx4, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_rviv_4vx4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], mr_s, mr_d, mr_c, mr_z ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 20*mr_s, 20*mr_d, 60*mr_c, 30*mr_z ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 320, 320, 160 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, 3072, 3072 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } } blis-1.1/config/rv64iv/bli_kernel_defs_rv64iv.h000066400000000000000000000034401474157777200214370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- //#endif blis-1.1/config/rv64iv/make_defs.mk000066400000000000000000000072061474157777200172120ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := rv64iv #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -DRISCV_SIZE=64 RISCV_ARCH := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_arch.h | grep '^[^\#]') RISCV_ABI := $(shell $(CC) -DFORCE_RISCV_VECTOR -E build/detect/riscv/bli_riscv_detect_abi.h | grep '^[^\#]') ifeq (,$(findstring 64,$(RISCV_ARCH))) $(error The RISC-V compiler architecture $(RISCV_ARCH) is not compatible with $(THIS_CONFIG)) else ifeq (,$(findstring 64,$(RISCV_ABI))) $(error The RISC-V compiler ABI $(RISCV_ABI) is not compatible with $(THIS_CONFIG)) endif CMISCFLAGS := -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors # In case the A extension is not available LDFLAGS += -latomic ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -ftree-vectorize endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) # Lower compiler optimization. cinvscalv fails at -O1 CRVECFLAGS := $(CKVECFLAGS) -O0 else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/sandybridge/000077500000000000000000000000001474157777200160715ustar00rootroot00000000000000blis-1.1/config/sandybridge/bli_cntx_init_sandybridge.c000066400000000000000000000067511474157777200234460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_sandybridge( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_sandybridge_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sandybridge_asm_8x8, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sandybridge_asm_8x4, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sandybridge_asm_8x4, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sandybridge_asm_4x4, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 8, 4, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 128, 96, 96, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 384, 256, 256, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 4096, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/sandybridge/bli_family_sandybridge.h000066400000000000000000000053741474157777200227350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_8x8 #define BLIS_DEFAULT_MC_S 128 #define BLIS_DEFAULT_KC_S 384 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 8 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x4 #define BLIS_DEFAULT_MC_D 96 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 4 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x4 #define BLIS_DEFAULT_MC_C 96 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 192 #define BLIS_DEFAULT_NC_Z 4096 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 4 #endif //#endif blis-1.1/config/sandybridge/bli_kernel_defs_sandybridge.h000066400000000000000000000037221474157777200237300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 8 #define BLIS_MR_c 8 #define BLIS_MR_z 4 #define BLIS_NR_s 8 #define BLIS_NR_d 4 #define BLIS_NR_c 4 #define BLIS_NR_z 4 //#endif blis-1.1/config/sandybridge/make_defs.mk000066400000000000000000000064041474157777200203440ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := sandybridge #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge ifeq ($(GCC_OT_4_9_0),yes) # If gcc is older than 4.9.0, we must use a different label for -march. CKVECFLAGS := -mavx -mfpmath=sse -march=corei7-avx endif else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xAVX else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/sifive_x280/000077500000000000000000000000001474157777200156445ustar00rootroot00000000000000blis-1.1/config/sifive_x280/bli_cntx_init_sifive_x280.c000066400000000000000000000246031474157777200227700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_sifive_x280( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_sifive_x280_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native kernels. bli_cntx_set_ukrs ( cntx, // Level 1 BLIS_ADDV_KER, BLIS_FLOAT, bli_saddv_sifive_x280_intr, BLIS_ADDV_KER, BLIS_DOUBLE, bli_daddv_sifive_x280_intr, BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_x280_intr, BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_x280_intr, BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_asm, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_asm, BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_asm, BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_asm, BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_x280_intr, BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_x280_intr, BLIS_AXPBYV_KER, BLIS_SCOMPLEX, bli_caxpbyv_sifive_x280_intr, BLIS_AXPBYV_KER, BLIS_DCOMPLEX, bli_zaxpbyv_sifive_x280_intr, BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_sifive_x280_intr, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_sifive_x280_intr, BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_x280_intr, BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_x280_intr, BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_asm, BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_asm, BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_asm, BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_asm, BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_x280_intr, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_x280_intr, BLIS_DOTV_KER, BLIS_SCOMPLEX, bli_cdotv_sifive_x280_intr, BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_sifive_x280_intr, BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_sifive_x280_intr, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_sifive_x280_intr, BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_x280_intr, BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_x280_intr, BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_asm, BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_asm, BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_asm, BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_asm, BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_asm, BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_asm, BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_asm, BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_asm, BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_x280_intr, BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_x280_intr, BLIS_SCAL2V_KER, BLIS_SCOMPLEX, bli_cscal2v_sifive_x280_intr, BLIS_SCAL2V_KER, BLIS_DCOMPLEX, bli_zscal2v_sifive_x280_intr, BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_sifive_x280_intr, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_sifive_x280_intr, BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_x280_intr, BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_x280_intr, BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_asm, BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_asm, BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_asm, BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_asm, BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_x280_intr, BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_x280_intr, BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_x280_intr, BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_x280_intr, BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_asm, BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_asm, BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_asm, BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_asm, BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_x280_intr, BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_x280_intr, BLIS_XPBYV_KER, BLIS_SCOMPLEX, bli_cxpbyv_sifive_x280_intr, BLIS_XPBYV_KER, BLIS_DCOMPLEX, bli_zxpbyv_sifive_x280_intr, // Level 1f BLIS_AXPY2V_KER, BLIS_FLOAT, bli_saxpy2v_sifive_x280_intr, BLIS_AXPY2V_KER, BLIS_DOUBLE, bli_daxpy2v_sifive_x280_intr, BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_x280_intr, BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_x280_intr, BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_asm, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_asm, BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_asm, BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_asm, BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_asm, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_asm, BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_asm, BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_asm, BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_x280_intr, BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_x280_intr, BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_x280_intr, BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_x280_intr, BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_asm, BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_asm, BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_asm, BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_asm, // Level 1m BLIS_PACKM_MRXK_KER, BLIS_FLOAT, bli_spackm_sifive_x280_asm_7xk, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_sifive_x280_asm_7xk, BLIS_PACKM_MRXK_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_x280_asm_6xk, BLIS_PACKM_MRXK_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_x280_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_FLOAT, bli_spackm_sifive_x280_asm_64xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_sifive_x280_asm_32xk, BLIS_PACKM_NRXK_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_x280_asm_32xk, BLIS_PACKM_NRXK_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_x280_asm_16xk, // Level 3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_asm_7m4, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_asm_7m4, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_asm_6m2, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_asm_6m2, BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_asm, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_asm, BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_asm, BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_asm, BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_asm, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_asm, BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_asm, BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_asm, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR ], 7, 7, 6, 6, 8, 8, 8, 8 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 64, 32, 32, 16 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 28, 28, 24, 24 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 1024, 1024, 1024, 1024 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 128, 256, 128 ); // Default BLIS_BBM_s = 1, but set here to ensure it's correct bli_blksz_init_easy( &blkszs[ BLIS_BBM ], 1, 1, 1, 1 ); bli_blksz_init_easy( &blkszs[ BLIS_BBN ], 1, 1, 1, 1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1m BLIS_BBM, &blkszs[ BLIS_BBM ], BLIS_BBM, BLIS_BBN, &blkszs[ BLIS_BBN ], BLIS_BBN, BLIS_VA_END ); } blis-1.1/config/sifive_x280/bli_family_sifive_x280.h000066400000000000000000000031641474157777200222560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ blis-1.1/config/sifive_x280/bli_kernel_defs_sifive_x280.h000066400000000000000000000040501474157777200232510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2023, SiFive, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 7 #define BLIS_MR_d 7 #define BLIS_MR_c 6 #define BLIS_MR_z 6 #define BLIS_PACKMR_s 8 #define BLIS_PACKMR_d 8 #define BLIS_PACKMR_c 8 #define BLIS_PACKMR_z 8 #define BLIS_NR_s 64 #define BLIS_NR_d 32 #define BLIS_NR_c 32 #define BLIS_NR_z 16 //#endif blis-1.1/config/sifive_x280/make_defs.mk000066400000000000000000000056121474157777200201170ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2023, SiFive, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := sifive_x280 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CMISCFLAGS_SIFIVE := -mcmodel=medany -march=rv64gcv_zba_zbb_zvl512b -mabi=lp64d CPPROCFLAGS := CMISCFLAGS := $(CMISCFLAGS_SIFIVE) -fdata-sections -ffunction-sections \ -fdiagnostics-color=always -fno-rtti -fno-exceptions CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wextra -Wno-unused-function -Wno-unused-parameter \ -Wno-sign-compare -Wno-unused-variable ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -Ofast endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) CKVECFLAGS := # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) CRVECFLAGS := $(CKVECFLAGS) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/skx/000077500000000000000000000000001474157777200144035ustar00rootroot00000000000000blis-1.1/config/skx/bli_cntx_init_skx.c000066400000000000000000000112171474157777200202630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_skx( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_skx_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT , bli_sgemm_skx_asm_32x12_l2, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_skx_asm_16x14, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, #if 1 // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, #endif // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT , FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 16, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 14, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 480, 240, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_KC ], 384, 256, -1, -1, 480, 320, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3752, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, BLIS_VA_END ); } blis-1.1/config/skx/bli_family_skx.h000066400000000000000000000113171474157777200175530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- THREADING PARAMETERS ----------------------------------------------------- #define BLIS_THREAD_RATIO_M 3 #define BLIS_THREAD_RATIO_N 2 #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 4 // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 64 #define BLIS_SIMD_MAX_SIZE 64 #define BLIS_SIMD_MAX_NUM_REGISTERS 32 //#include //#define BLIS_MALLOC_POOL malloc //#define BLIS_FREE_POOL free #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- // -- Cache and register blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_16x12_l2 #define BLIS_DEFAULT_MC_D 144 #define BLIS_DEFAULT_KC_D 336 #define BLIS_DEFAULT_NC_D 5760 #define BLIS_DEFAULT_MR_D 16 #define BLIS_DEFAULT_NR_D 12 #define BLIS_PACKDIM_MR_D 16 #define BLIS_PACKDIM_NR_D 12 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. #define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) #define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) #define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + 0) #define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) #define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) #define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + 0) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) #endif //#endif blis-1.1/config/skx/bli_kernel_defs_skx.h000066400000000000000000000035761474157777200205630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 32 #define BLIS_MR_d 16 #define BLIS_NR_s 12 #define BLIS_NR_d 14 //#endif blis-1.1/config/skx/make_defs.mk000066400000000000000000000113041474157777200166510ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := skx #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. # NOTE: The -fomit-frame-pointer option is needed for some kernels because # they make explicit use of the rbp register. CKOPTFLAGS := $(COPTFLAGS) -O3 -fomit-frame-pointer ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xCORE-AVX512 else ifeq ($(CC_VENDOR),clang) # NOTE: We have to use -march=haswell on Windows because apparently AVX512 # uses an alternate calling convention where xmm registers are not callee-saved # on the stack. When this is mixed with framework code compiled for general # x86_64 mode then chaos ensues (e.g. #514). ifeq ($(IS_WIN),yes) CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=haswell else CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 endif else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # The assembler on OS X won't recognize AVX512 without help ifneq ($(CC_VENDOR),icc) ifeq ($(OS_NAME),Darwin) CKVECFLAGS += -Wa,-march=skylake-avx512 endif endif # Flags specific to reference kernels. # Note: We use AVX2 for reference kernels because, as Jeff Hammond says, # reference kernel code "is not going to achieve high enough SIMD utilization # to overcome the AVX-512 frequency drop". (Issue #187) CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),icc) CRVECFLAGS := -xCORE-AVX2 else ifeq ($(CC_VENDOR),clang) # NOTE: We have to use -march=haswell on Windows because apparently AVX512 # uses an alternate calling convention where xmm registers are not callee-saved # on the stack. When this is mixed with framework code compiled for general # x86_64 mode then chaos ensues (e.g. #514). ifeq ($(IS_WIN),yes) CRVECFLAGS := -march=haswell -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast endif else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/steamroller/000077500000000000000000000000001474157777200161275ustar00rootroot00000000000000blis-1.1/config/steamroller/bli_cntx_init_steamroller.c000066400000000000000000000067461474157777200235460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_steamroller( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_steamroller_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_piledriver_asm_16x3, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_piledriver_asm_8x3, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_piledriver_asm_4x2, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_piledriver_asm_2x2, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 4, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 3, 3, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 2016, 1008, 512, 400 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 128, 128, 256, 160 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8400, 8400, 8400, 8400 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/steamroller/bli_family_steamroller.h000066400000000000000000000035051474157777200230230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 //#endif blis-1.1/config/steamroller/bli_kernel_defs_steamroller.h000066400000000000000000000037231474157777200240250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 16 #define BLIS_MR_d 8 #define BLIS_MR_c 4 #define BLIS_MR_z 2 #define BLIS_NR_s 3 #define BLIS_NR_d 3 #define BLIS_NR_c 2 #define BLIS_NR_z 2 //#endif blis-1.1/config/steamroller/make_defs.mk000066400000000000000000000061601474157777200204010ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := steamroller #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/template/000077500000000000000000000000001474157777200154115ustar00rootroot00000000000000blis-1.1/config/template/bli_cntx_init_template.c000066400000000000000000000102171474157777200222760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_template( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_template_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_template_noopt, BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_template_noopt, BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_template_noopt, BLIS_TRSM_L_UKR, BLIS_DCOMPLEX, bli_ztrsm_l_template_noopt, BLIS_TRSM_U_UKR, BLIS_DCOMPLEX, bli_ztrsm_u_template_noopt, // level-1f BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_template_noopt, BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_template_noopt, BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_template_noopt, BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_template_noopt, BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_template_noopt, // level-1v BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_template_noopt, BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_template_noopt, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_TRSM_L_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_TRSM_U_UKR_ROW_PREF, BLIS_DCOMPLEX, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], -1, -1, -1, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, -1, -1, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, -1, -1, 128 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, -1, -1, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, -1, -1, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/template/bli_family_template.h000066400000000000000000000033071474157777200215670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-1.1/config/template/bli_kernel_defs_template.h000066400000000000000000000041521474157777200225660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- // // Only defined for block sizes which are not taken as the default (i.e. when // an optimized kernel is provided). // #define BLIS_MR_z 4 #define BLIS_NR_z 4 // // PACKMR/PACKNR do not need to be defined unless they are different from the // "normal" MR/NR. // //#define BLIS_PACKMR_z 4 //#define BLIS_PACKNR_z 4 //#endif blis-1.1/config/template/kernels/000077500000000000000000000000001474157777200170545ustar00rootroot00000000000000blis-1.1/config/template/kernels/1/000077500000000000000000000000001474157777200172145ustar00rootroot00000000000000blis-1.1/config/template/kernels/1/bli_axpyv_template_noopt_var1.c000066400000000000000000000154101474157777200254210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zaxpyv_template_noopt ( conj_t conjx, dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, cntx_t* cntx ) { /* Template axpyv kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs a vector scale and accumulate (axpy) operation: y := y + alpha * conjx( x ) where x and y are vectors of length n and alpha is a scalar. Parameters: - conjx: Compute with conjugated values of x? - n: The number of elements in vectors x and y. - alpha: The address of a scalar. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides incx or incy is non-unit. - Vectors x and y are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; bool use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y; dim_t i; if ( bli_zero_dim1( n ) ) return; if ( bli_zeq0( *alpha ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc2( incx, incy ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zaxpyv_ft f = bli_zaxpyv_template_ref; f ( conjx, n, alpha, x, incx, y, incy, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x and y. xp = x; yp = y; // Iterate over elements of x and y to compute: // y += alpha * conjx( x ); if ( bli_is_noconj( conjx ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpys( *alpha, *xp, *yp ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpys( *alpha, *xp, *yp ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpys( *alpha, *xp, *yp ); xp += 1; yp += 1; } } else // if ( bli_is_conj( conjx ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpyjs( *alpha, *xp, *yp ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpyjs( *alpha, *xp, *yp ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpyjs( *alpha, *xp, *yp ); xp += 1; yp += 1; } } } blis-1.1/config/template/kernels/1/bli_dotv_template_noopt_var1.c000066400000000000000000000172471474157777200252400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotv_template_noopt ( conj_t conjx, conj_t conjy, dim_t n, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, dcomplex* restrict rho, cntx_t* cntx ) { /* Template dotv kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs an inner (dot) product operation: rho := conjx( x^T ) * conjy( y ) where x and y are vectors of length n and rho is a scalar. Parameters: - conjx: Compute with conjugated values of x? - conjy: Compute with conjugated values of y? - n: The number of elements in vectors x and y. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. - rho: The address of the output scalar. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides incx or incy is non-unit. - Vectors x and y are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - While four combinations of possible values of conjx and conjy exist, we implement only conjugation on x explicitly; we induce the other two cases by toggling the effective conjugation on x and then conjugating the dot product result. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; dcomplex dotxy; bool use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y; dim_t i; conj_t conjx_use; // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { bli_zset0s( *rho ); return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc2( incx, incy ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotv_ft f = bli_zdotv_template_ref; f ( conjx, conjy, n, x, incx, y, incy, rho, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x and y. xp = x; yp = y; // Initialize accumulator to zero. bli_zset0s( dotxy ); conjx_use = conjx; // If y must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of x and then conjugating the // resulting dot product. if ( bli_is_conj( conjy ) ) bli_toggle_conj( &conjx_use ); // Iterate over elements of x and y to compute: // rho = conjx( x^T ) * conjy( y ); if ( bli_is_noconj( conjx_use ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdots( *xp, *yp, dotxy ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdots( *xp, *yp, dotxy ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdots( *xp, *yp, dotxy ); xp += 1; yp += 1; } } else // if ( bli_is_conj( conjx_use ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); xp += 1; yp += 1; } } // If conjugation on y was requested, we induce it by conjugating // the contents of dotxy. if ( bli_is_conj( conjy ) ) bli_zconjs( dotxy ); bli_zcopys( dotxy, *rho ); } blis-1.1/config/template/kernels/1f/000077500000000000000000000000001474157777200173625ustar00rootroot00000000000000blis-1.1/config/template/kernels/1f/bli_axpy2v_template_noopt_var1.c000066400000000000000000000240661474157777200256600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zaxpy2v_template_noopt ( conj_t conjx, conj_t conjy, dim_t n, dcomplex* restrict alpha1, dcomplex* restrict alpha2, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, dcomplex* restrict z, inc_t incz, cntx_t* cntx ) { /* Template axpy2v kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel fuses two axpyv operations: z := z + alpha1 * conjx( x ) z := z + alpha2 * conjy( y ) where x, y, and z are vectors of length n and alpha1 and alpha2 are scalars. Parameters: - conjx: Compute with conjugated values of x? - conjy: Compute with conjugated values of y? - n: The number of elements in vectors x, y, and z. - alpha1: The address of the scalar to be applied to x. - alpha2: The address of the scalar to be applied to y. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. - z: The address of vector z. - incz: The vector increment of z. incz should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Any of the strides incx, incy, or incz is non-unit. - Vectors x, y, and z are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Here are a few additional things to consider: - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; dcomplex* zp; bool use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y, off_z; dim_t i; // Return early if possible. if ( bli_zero_dim1( n ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc3( incx, incy, incz ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( z, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); off_z = bli_offset_from_alignment( z, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y && off_x == off_z ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zaxpy2v_ft f = bli_zaxpy2v_template_ref; f ( conjx, conjy, n, alpha1, alpha2, x, incx, y, incy, z, incz, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x, y, and z. xp = x; yp = y; zp = z; // Iterate over rows of x, y, and z to compute: // z += alpha1 * conjx( x ) + alpha2 * conjy( y ); if ( bli_is_noconj( conjx ) && bli_is_noconj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_noconj( conjx ) && bli_is_conj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_conj( conjx ) && bli_is_noconj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } else // if ( bli_is_conj( conjx ) && bli_is_conj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } } blis-1.1/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c000066400000000000000000000223431474157777200255520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zaxpyf_template_noopt ( conj_t conja, conj_t conjx, dim_t m, dim_t b_n, dcomplex* restrict alpha, dcomplex* restrict a, inc_t inca, inc_t lda, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, cntx_t* cntx ) { /* Template axpyf kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs the following gemv-like operation: y := y + alpha * conja( A ) * conjx( x ) where A is an m x b_n matrix, x is a vector of length b_n, y is a vector of length m, and alpha is a scalar. The operation is performed as a series of fused axpyv operations, and therefore A should be column-stored. Parameters: - conja: Compute with conjugated values of A? - conjx: Compute with conjugated values of x? - m: The number of rows in matrix A. - b_n: The number of columns in matrix A. Must be equal to or less than the fusing factor. - alpha: The address of a scalar. - a: The address of matrix A. - inca: The row stride of A. inca should be unit unless the implementation makes special accomodation for non-unit values. - lda: The column stride of A. - x: The address of vector x. - incx: The vector increment of x. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides inca or incy is non-unit. - The address of A, the second column of A, and y are unaligned with different offsets. If the first/second columns of A and address of y are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - When optimizing, you should fully unroll the loops over b_n. This is the dimension across which we are fusing axpyv operations. - This template code chooses to call the reference implementation whenever b_n is less than the fusing factor, so as to avoid having to handle edge cases. One may choose to optimize this edge case, if desired. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *a ); dcomplex* ap[ bli_zaxpyf_fusefac ]; dcomplex* xp[ bli_zaxpyf_fusefac ]; dcomplex* yp; dcomplex alpha_x[ bli_zaxpyf_fusefac ]; bool use_ref = FALSE; dim_t m_pre = 0; dim_t m_iter; dim_t m_left; dim_t off_a, off_a2, off_y; dim_t i, j; // Return early if possible. if ( bli_zero_dim2( m, b_n ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_zaxpyf_fusefac ) { use_ref = TRUE; } else if ( bli_has_nonunit_inc3( inca, incx, incy ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( a, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( a+lda, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_a = bli_offset_from_alignment( a, BLIS_SIMD_ALIGN_SIZE ); off_a2 = bli_offset_from_alignment( a+lda, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); if ( off_a == off_y && off_a == off_a2 ) { use_ref = FALSE; m_pre = off_a / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zaxpyf_ft f = bli_zaxpyf_template_ref; f ( conja, conjx, m, b_n, alpha, a, inca, lda, x, incx, y, incy, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. m_iter = ( m - m_pre ) / n_elem_per_iter; m_left = ( m - m_pre ) % n_elem_per_iter; // Initialize pointers into the columns of A and elements of x. for ( j = 0; j < b_n; ++j ) { ap[ j ] = a + (j )*lda; xp[ j ] = x + (j )*incx; } yp = y; // Load elements of x or conj(x) into alpha_x and scale by alpha. if ( bli_is_noconj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopys( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } else // if ( bli_is_conj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopyjs( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } // Iterate over rows of A and y to compute: // y += conja( A )*conjx( x ); if ( bli_is_noconj( conja ) ) { // Compute front edge cases if a and y were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, and the b_n loop should be fully unrolled. The addresses in // ap[] and yp are guaranteed to be aligned to // BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += n_elem_per_iter; } yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } } else // if ( bli_is_conj( conja ) ) { // Compute front edge cases if a and y were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, and the b_n loop should be fully unrolled. The addresses in // ap[] and yp are guaranteed to be aligned to // BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += n_elem_per_iter; } yp += n_elem_per_iter; } // Compute tail edge cases. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } } } blis-1.1/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c000066400000000000000000000260401474157777200262770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotaxpyv_template_noopt ( conj_t conjxt, conj_t conjx, conj_t conjy, dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, dcomplex* restrict rho, dcomplex* restrict z, inc_t incz, cntx_t* cntx ) { /* Template dotaxpyv kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel fuses a dotv and axpyv operation: rho := conjxt( x^T ) * conjy( y ) z := z + alpha * conjx( x ) where x, y, and z are vectors of length n and alpha1 and alpha2 are scalars. Parameters: - conjxt: Compute with conjugated values of x^T? - conjx: Compute with conjugated values of x? - conjy: Compute with conjugated values of y? - n: The number of elements in vectors x, y, and z. - alpha: The address of the scalar to be applied to x. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. - rho: The address of the output scalar of the dotv subproblem. - z: The address of vector z. - incz: The vector increment of z. incz should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Any of the strides incx, incy, or incz is non-unit. - Vectors x, y, and z are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Here are a few additional things to consider: - While four combinations of possible values of conjx and conjy exist, we implement only conjugation on x explicitly; we induce the other two cases by toggling the effective conjugation on x and then conjugating the dot product result. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; dcomplex* zp; dcomplex dotxy; bool use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y, off_z; dim_t i; conj_t conjxt_use; // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { bli_zset0s( *rho ); return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc3( incx, incy, incz ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( z, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If x, y, and z are unaligned by the same offset, then we can // still use an implementation that depends on alignment for most // of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); off_z = bli_offset_from_alignment( z, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y && off_x == off_z ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotaxpyv_ft f = bli_zdotaxpyv_template_ref; f ( conjxt, conjx, conjy, n, alpha, x, incx, y, incy, rho, z, incz, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x, y, and z. xp = x; yp = y; zp = z; // Initialize accumulator to zero. bli_zset0s( dotxy ); conjxt_use = conjxt; // If y must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of xt and then conjugating the // resulting dot product. if ( bli_is_conj( conjy ) ) bli_toggle_conj( &conjxt_use ); // Iterate over elements of x, y, and z to compute: // r = conjxt( x^T ) * conjy( y ); // z += alpha * conjx( x ); if ( bli_is_noconj( conjx ) && bli_is_noconj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_noconj( conjx ) && bli_is_conj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_conj( conjx ) && bli_is_noconj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } else // if ( bli_is_conj( conjx ) && bli_is_conj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } // If conjugation on y was requested, we induce it by conjugating // the contents of rho. if ( bli_is_conj( conjy ) ) bli_zconjs( dotxy ); bli_zcopys( dotxy, *rho ); } blis-1.1/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c000066400000000000000000000340001474157777200264420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotxaxpyf_template_noopt ( conj_t conjat, conj_t conja, conj_t conjw, conj_t conjx, dim_t m, dim_t b_n, dcomplex* restrict alpha, dcomplex* restrict a, inc_t inca, inc_t lda, dcomplex* restrict w, inc_t incw, dcomplex* restrict x, inc_t incx, dcomplex* restrict beta, dcomplex* restrict y, inc_t incy, dcomplex* restrict z, inc_t incz, cntx_t* cntx ) { /* Template dotxaxpyf kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs the following two gemv-like operations: y := beta * y + alpha * conjat( A^T ) * conjw( w ) z := z + alpha * conja( A ) * conjx( x ) where A is an m x b_n matrix, x and y are vector of length b_n, w and z are vectors of length m, and alpha and beta are scalars. The operation fuses a dotxf and an axpyf operation, and therefore A should be column- stored. Parameters: - conjat: Compute with conjugated values of A^T? - conja: Compute with conjugated values of A? - conjw: Compute with conjugated values of w? - conjx: Compute with conjugated values of x? - m: The number of rows in matrix A. - b_n: The number of columns in matrix A. Must be equal to or less than the fusing factor. - alpha: The address of the scalar to be applied to A^T*w and A*x. - a: The address of matrix A. - inca: The row stride of A. inca should be unit unless the implementation makes special accomodation for non-unit values. - lda: The column stride of A. - w: The address of vector w. - incw: The vector increment of w. incw should be unit unless the implementation makes special accomodation for non-unit values. - x: The address of vector x. - incx: The vector increment of x. - beta: The address of the scalar to be applied to y. - y: The address of vector y. - incy: The vector increment of y. - z: The address of vector z. - incz: The vector increment of z. incz should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Any of the strides inca, incw, or incz is non-unit. - The address of A, the second column of A, w, and z are unaligned with different offsets. If the first/second rows of A and addresses of w and z are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - When optimizing, you should fully unroll the loops over b_n. This is the dimension across which we are fusing dotxv operations. - This template code chooses to call the reference implementation whenever b_n is less than the fusing factor, so as to avoid having to handle edge cases. One may choose to optimize this edge case, if desired. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *a ); dcomplex* ap[ bli_zdotxaxpyf_fusefac ]; dcomplex* xp[ bli_zdotxaxpyf_fusefac ]; dcomplex* yp[ bli_zdotxaxpyf_fusefac ]; dcomplex* wp; dcomplex* zp; dcomplex At_w[ bli_zdotxaxpyf_fusefac ]; dcomplex alpha_x[ bli_zdotxaxpyf_fusefac ]; bool use_ref = FALSE; dim_t m_pre = 0; dim_t m_iter; dim_t m_left; dim_t off_a, off_a2, off_w, off_z; dim_t i, j; conj_t conjat_use; // Return early if possible. if ( bli_zero_dim2( m, b_n ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_zdotxaxpyf_fusefac ) { use_ref = TRUE; } else if ( bli_has_nonunit_inc3( inca, incw, incz ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( a, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( a+lda, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( w, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( z, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, w, and z are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_a = bli_offset_from_alignment( a, BLIS_SIMD_ALIGN_SIZE ); off_a2 = bli_offset_from_alignment( a+lda, BLIS_SIMD_ALIGN_SIZE ); off_w = bli_offset_from_alignment( w, BLIS_SIMD_ALIGN_SIZE ); off_z = bli_offset_from_alignment( z, BLIS_SIMD_ALIGN_SIZE ); if ( off_a == off_a2 && off_a == off_w && off_a == off_z ) { use_ref = FALSE; m_pre = off_a / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotxaxpyf_ft f = bli_zdotxaxpyf_template_ref; f ( conjat, conja, conjw, conjx, m, b_n, alpha, a, inca, lda, w, incw, x, incx, beta, y, incy, z, incz, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. m_iter = ( m - m_pre ) / n_elem_per_iter; m_left = ( m - m_pre ) % n_elem_per_iter; // Initialize pointers into the columns of A and elements of x. for ( j = 0; j < b_n; ++j ) { ap[ j ] = a + (j )*lda; xp[ j ] = x + (j )*incx; yp[ j ] = y + (j )*incy; } wp = w; zp = z; // Load elements of x or conj(x) into alpha_x and scale by alpha. if ( bli_is_noconj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopys( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } else // if ( bli_is_conj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopyjs( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } // Initialize our accumulators to zero. for ( j = 0; j < b_n; ++j ) { bli_zset0s( At_w[ j ] ); } conjat_use = conjat; // If w must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of At and then conjugating the // resulting dot products. if ( bli_is_conj( conjw ) ) bli_toggle_conj( &conjat_use ); // Iterate over the columns of A and elements of w and z to compute: // y = beta * y + alpha * conjat( A^T ) * conjw( w ); // z = z + alpha * conja( A ) * conjx( x ); // where A is m x b_n. if ( bli_is_noconj( conja ) && bli_is_noconj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } else if ( bli_is_noconj( conja ) && bli_is_conj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } else if ( bli_is_conj( conja ) && bli_is_noconj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } else if ( bli_is_conj( conja ) && bli_is_conj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } // If conjugation on w was requested, we induce it by conjugating // the contents of At_w. if ( bli_is_conj( conjw ) ) { for ( j = 0; j < b_n; ++j ) { bli_zconjs( At_w[ j ] ); } } // Scale the At_w product by alpha and accumulate into y after // scaling by beta. for ( j = 0; j < b_n; ++j ) { bli_zscals( *beta, *yp[ j ] ); bli_zaxpys( *alpha, At_w[ j ], *yp[ j ] ); } } blis-1.1/config/template/kernels/1f/bli_dotxf_template_noopt_var1.c000066400000000000000000000236241474157777200255520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotxf_template_noopt ( conj_t conjat, conj_t conjx, dim_t m, dim_t b_n, dcomplex* restrict alpha, dcomplex* restrict a, inc_t inca, inc_t lda, dcomplex* restrict x, inc_t incx, dcomplex* restrict beta, dcomplex* restrict y, inc_t incy, cntx_t* cntx ) { /* Template dotxf kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs the following gemv-like operation: y := beta * y + alpha * conjat( A^T ) * conjx( x ) where A is an m x b_n matrix, x is a vector of length m, y is a vector of length b_n, and alpha and beta are scalars. The operation is performed as a series of fused dotxv operations, and therefore A should be column- stored. Parameters: - conjat: Compute with conjugated values of A^T? - conjx: Compute with conjugated values of x? - m: The number of rows in matrix A. - b_n: The number of columns in matrix A. Must be equal to or less than the fusing factor. - alpha: The address of the scalar to be applied to A*x. - a: The address of matrix A. - inca: The row stride of A. inca should be unit unless the implementation makes special accomodation for non-unit values. - lda: The column stride of A. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - beta: The address of the scalar to be applied to y. - y: The address of vector y. - incy: The vector increment of y. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides inca or incx is non-unit. - The address of A, the second column of A, and x are unaligned with different offsets. If the first/second columns of A and address of x are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - When optimizing, you should fully unroll the loops over b_n. This is the dimension across which we are fusing dotxv operations. - This template code chooses to call the reference implementation whenever b_n is less than the fusing factor, so as to avoid having to handle edge cases. One may choose to optimize this edge case, if desired. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* ap[ bli_zdotxf_fusefac ]; dcomplex* xp; dcomplex* yp[ bli_zdotxf_fusefac ]; dcomplex Atx[ bli_zdotxf_fusefac ]; bool use_ref = FALSE; dim_t m_pre = 0; dim_t m_iter; dim_t m_left; dim_t off_a, off_a2, off_x; dim_t i, j; conj_t conjat_use; // Return early if possible. if ( bli_zero_dim1( b_n ) ) return; // If the vector lengths are zero, scale r by beta and return. if ( bli_zero_dim1( m ) ) { bli_zscalv_ex ( BLIS_NO_CONJUGATE, b_n, beta, y, incy, cntx ); return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_zdotxf_fusefac ) { use_ref = TRUE; } else if ( bli_has_nonunit_inc2( inca, incx ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( a, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( a+lda, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and x are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_a = bli_offset_from_alignment( a, BLIS_SIMD_ALIGN_SIZE ); off_a2 = bli_offset_from_alignment( a+lda, BLIS_SIMD_ALIGN_SIZE ); off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); if ( off_a == off_a2 && off_a == off_x ) { use_ref = FALSE; m_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotxf_ft f = bli_zdotxf_template_ref; f ( conjat, conjx, m, b_n, alpha, a, inca, lda, x, incx, beta, y, incy, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. m_iter = ( m - m_pre ) / n_elem_per_iter; m_left = ( m - m_pre ) % n_elem_per_iter; // Initialize pointers into the rows of A and elements of y. for ( i = 0; i < b_n; ++i ) { ap[ i ] = a + (i )*lda; yp[ i ] = y + (i )*incy; } xp = x; // Initialize our accumulators to zero. for ( i = 0; i < b_n; ++i ) { bli_zset0s( Atx[ i ] ); } conjat_use = conjat; // If x must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of A and then conjugating the // resulting product A^T*x. if ( bli_is_conj( conjx ) ) bli_toggle_conj( &conjat_use ); // Iterate over columns of A and rows of x to compute: // Atx = conjat_use( A^T ) * x; if ( bli_is_noconj( conjat_use ) ) { // Compute front edge cases if A and y were unaligned. for ( j = 0; j < m_pre; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of Atx should be kept in registers, and the b_n loop // should be fully unrolled. The addresses in ap[] and xp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( j = 0; j < m_iter; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += n_elem_per_iter; } xp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( j = 0; j < m_left; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } } else // if ( bli_is_conj( conjat_use ) ) { // Compute front edge cases if A and y were unaligned. for ( j = 0; j < m_pre; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of Atx should be kept in registers, and the b_n loop // should be fully unrolled. The addresses in ap[] and xp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( j = 0; j < m_iter; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += n_elem_per_iter; } xp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( j = 0; j < m_left; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } } // If conjugation on y was requested, we induce it by conjugating // the contents of Atx. if ( bli_is_conj( conjx ) ) { for ( i = 0; i < b_n; ++i ) { bli_zconjs( Atx[ i ] ); } } // Scale the Atx product by alpha and accumulate into y after // scaling by beta. for ( i = 0; i < b_n; ++i ) { bli_zzscals( *beta, *yp[ i ] ); bli_zzzaxpys( *alpha, Atx[ i ], *yp[ i ] ); } } blis-1.1/config/template/kernels/3/000077500000000000000000000000001474157777200172165ustar00rootroot00000000000000blis-1.1/config/template/kernels/3/bli_gemm_template_noopt_mxn.c000066400000000000000000000110661474157777200251350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zgemm_template_noopt ( dim_t m, dim_t n, dim_t k, dcomplex* restrict alpha, dcomplex* restrict a1, dcomplex* restrict b1, dcomplex* restrict beta, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* data, cntx_t* cntx ) { /* Template gemm micro-kernel implementation This function contains a template implementation for a double-precision complex micro-kernel, coded in C, which can serve as the starting point for one to write an optimized micro-kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be nearly identical.) This micro-kernel performs a matrix-matrix multiplication of the form: C11 := beta * C11 + alpha * A1 * B1 where A1 is MR x k, B1 is k x NR, C11 is MR x NR, and alpha and beta are scalars. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const num_t dt = BLIS_DCOMPLEX; const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const inc_t cs_a = packmr; const inc_t rs_b = packnr; const inc_t rs_ab = 1; const inc_t cs_ab = mr; dim_t l, j, i; dcomplex ab[ mr * nr ]; dcomplex* abij; dcomplex ai, bj; /* Initialize the accumulator elements in ab to zero. */ for ( i = 0; i < mr * nr; ++i ) { bli_zset0s( *(ab + i) ); } /* Perform a series of k rank-1 updates into ab. */ for ( l = 0; l < k; ++l ) { abij = ab; /* In an optimized implementation, these two loops over MR and NR are typically fully unrolled. */ for ( j = 0; j < nr; ++j ) { bj = *(b1 + j); for ( i = 0; i < mr; ++i ) { ai = *(a1 + i); bli_zdots( ai, bj, *abij ); abij += rs_ab; } } a1 += cs_a; b1 += rs_b; } /* Scale each element of ab by alpha. */ for ( i = 0; i < mr * nr; ++i ) { bli_zscals( *alpha, *(ab + i) ); } /* If beta is zero, overwrite c11 with the scaled result in ab. Otherwise, scale c11 by beta and then add the scaled result in ab. */ if ( bli_zeq0( *beta ) ) { /* c11 := ab */ bli_zcopys_mxn( m, n, ab, rs_ab, cs_ab, c11, rs_c, cs_c ); } else { /* c11 := beta * c11 + ab */ bli_zxpbys_mxn( m, n, ab, rs_ab, cs_ab, beta, c11, rs_c, cs_c ); } } blis-1.1/config/template/kernels/3/bli_gemmtrsm_l_template_noopt_mxn.c000066400000000000000000000065761474157777200263700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zgemmtrsm_l_template_noopt ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a10, dcomplex* restrict a11, dcomplex* restrict b01, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* data, cntx_t* cntx ) { /* Template gemmtrsm_l micro-kernel implementation This function contains a template implementation for a double-precision complex micro-kernel that fuses a gemm with a trsm_l subproblem. This micro-kernel performs the following compound operation: B11 := alpha * B11 - A10 * B01 (gemm) B11 := inv(A11) * B11 (trsm) C11 := B11 where A11 is MR x MR and lower triangular, A10 is MR x k, B01 is k x NR, B11 is MR x NR, and alpha is a scalar. Here, inv() denotes matrix inverse. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const num_t dt = BLIS_DCOMPLEX; const inc_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const inc_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const inc_t rs_b = packnr; const inc_t cs_b = 1; dcomplex* restrict minus_one = bli_zm1; /* b11 = alpha * b11 - a10 * b01; */ bli_zgemm_template_noopt ( mr, nr, k, minus_one, a10, b01, alpha, b11, rs_b, cs_b, data ); /* b11 = inv(a11) * b11; c11 = b11; */ bli_ztrsm_l_template_noopt ( a11, b11, c11, rs_c, cs_c, data ); } blis-1.1/config/template/kernels/3/bli_gemmtrsm_u_template_noopt_mxn.c000066400000000000000000000065751474157777200264000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zgemmtrsm_u_template_noopt ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a10, dcomplex* restrict a11, dcomplex* restrict b01, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* data, cntx_t* cntx ) { /* Template gemmtrsm_u micro-kernel implementation This function contains a template implementation for a double-precision complex micro-kernel that fuses a gemm with a trsm_u subproblem. This micro-kernel performs the following compound operation: B11 := alpha * B11 - A12 * B21 (gemm) B11 := inv(A11) * B11 (trsm) C11 := B11 where A11 is MR x MR and upper triangular, A12 is MR x k, B21 is k x NR, B11 is MR x NR, and alpha is a scalar. Here, inv() denotes matrix inverse. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const num_t dt = BLIS_DCOMPLEX; const inc_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const inc_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const inc_t rs_b = packnr; const inc_t cs_b = 1; dcomplex* restrict minus_one = bli_zm1; /* b11 = alpha * b11 - a12 * b21; */ bli_zgemm_template_noopt ( mr, nr, k, minus_one, a10, b01, alpha, b11, rs_b, cs_b, data ); /* b11 = inv(a11) * b11; c11 = b11; */ bli_ztrsm_u_template_noopt ( a11, b11, c11, rs_c, cs_c, data ); } blis-1.1/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c000066400000000000000000000111321474157777200255020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_ztrsm_l_template_noopt ( dcomplex* restrict a11, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* data, cntx_t* cntx ) { /* Template trsm_l micro-kernel implementation This function contains a template implementation for a double-precision complex trsm micro-kernel, coded in C, which can serve as the starting point for one to write an optimized micro-kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be nearly identical.) This micro-kernel performs the following operation: C11 := inv(A11) * B11 where A11 is MR x MR and lower triangular, B11 is MR x NR, and C11 is MR x NR. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const dim_t m = mr; const dim_t n = nr; const inc_t rs_a = 1; const inc_t cs_a = packmr; const inc_t rs_b = packnr; const inc_t cs_b = 1; dim_t iter, i, j, l; dim_t n_behind; dcomplex* restrict alpha11; dcomplex* restrict a10t; dcomplex* restrict alpha10; dcomplex* restrict X0; dcomplex* restrict x1; dcomplex* restrict x01; dcomplex* restrict chi01; dcomplex* restrict chi11; dcomplex* restrict gamma11; dcomplex rho11; for ( iter = 0; iter < m; ++iter ) { i = iter; n_behind = i; alpha11 = a11 + (i )*rs_a + (i )*cs_a; a10t = a11 + (i )*rs_a + (0 )*cs_a; X0 = b11 + (0 )*rs_b + (0 )*cs_b; x1 = b11 + (i )*rs_b + (0 )*cs_b; /* x1 = x1 - a10t * X0; */ /* x1 = x1 / alpha11; */ for ( j = 0; j < n; ++j ) { x01 = X0 + (0 )*rs_b + (j )*cs_b; chi11 = x1 + (0 )*rs_b + (j )*cs_b; gamma11 = c11 + (i )*rs_c + (j )*cs_c; /* chi11 = chi11 - a10t * x01; */ bli_zset0s( rho11 ); for ( l = 0; l < n_behind; ++l ) { alpha10 = a10t + (l )*cs_a; chi01 = x01 + (l )*rs_b; bli_zaxpys( *alpha10, *chi01, rho11 ); } bli_zsubs( rho11, *chi11 ); /* chi11 = chi11 / alpha11; */ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ bli_zscals( *alpha11, *chi11 ); /* Output final result to matrix C. */ bli_zcopys( *chi11, *gamma11 ); } } } blis-1.1/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c000066400000000000000000000111451474157777200255170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_ztrsm_u_template_noopt ( dcomplex* restrict a11, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* data, cntx_t* cntx ) { /* Template trsm_u micro-kernel implementation This function contains a template implementation for a double-precision complex trsm micro-kernel, coded in C, which can serve as the starting point for one to write an optimized micro-kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be nearly identical.) This micro-kernel performs the following operation: C11 := inv(A11) * B11 where A11 is MR x MR and upper triangular, B11 is MR x NR, and C11 is MR x NR. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const dim_t m = mr; const dim_t n = nr; const inc_t rs_a = 1; const inc_t cs_a = packmr; const inc_t rs_b = packnr; const inc_t cs_b = 1; dim_t iter, i, j, l; dim_t n_behind; dcomplex* restrict alpha11; dcomplex* restrict a12t; dcomplex* restrict alpha12; dcomplex* restrict X2; dcomplex* restrict x1; dcomplex* restrict x21; dcomplex* restrict chi21; dcomplex* restrict chi11; dcomplex* restrict gamma11; dcomplex rho11; for ( iter = 0; iter < m; ++iter ) { i = m - iter - 1; n_behind = iter; alpha11 = a11 + (i )*rs_a + (i )*cs_a; a12t = a11 + (i )*rs_a + (i+1)*cs_a; x1 = b11 + (i )*rs_b + (0 )*cs_b; X2 = b11 + (i+1)*rs_b + (0 )*cs_b; /* x1 = x1 - a12t * X2; */ /* x1 = x1 / alpha11; */ for ( j = 0; j < n; ++j ) { chi11 = x1 + (0 )*rs_b + (j )*cs_b; x21 = X2 + (0 )*rs_b + (j )*cs_b; gamma11 = c11 + (i )*rs_c + (j )*cs_c; /* chi11 = chi11 - a12t * x21; */ bli_zset0s( rho11 ); for ( l = 0; l < n_behind; ++l ) { alpha12 = a12t + (l )*cs_a; chi21 = x21 + (l )*rs_b; bli_zaxpys( *alpha12, *chi21, rho11 ); } bli_zsubs( rho11, *chi11 ); /* chi11 = chi11 / alpha11; */ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ bli_zscals( *alpha11, *chi11 ); /* Output final result to matrix C. */ bli_zcopys( *chi11, *gamma11 ); } } } blis-1.1/config/template/make_defs.mk000066400000000000000000000051321474157777200176610ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := template #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 CKVECFLAGS := # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) CRVECFLAGS := $(CKVECFLAGS) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/thunderx2/000077500000000000000000000000001474157777200155215ustar00rootroot00000000000000blis-1.1/config/thunderx2/bli_cntx_init_thunderx2.c000066400000000000000000000063621474157777200225240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_thunderx2( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_thunderx2_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // level-3 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // level-3 BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, FALSE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, FALSE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 240, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_VA_END ); } blis-1.1/config/thunderx2/bli_family_thunderx2.h000066400000000000000000000034701474157777200220100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 blis-1.1/config/thunderx2/bli_kernel_defs_thunderx2.h000066400000000000000000000035731474157777200230140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 8 #define BLIS_MR_d 6 #define BLIS_NR_s 12 #define BLIS_NR_d 8 //#endif blis-1.1/config/thunderx2/make_defs.mk000066400000000000000000000060511474157777200177720ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := thunderx2 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -mcpu=thunderx2t99 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=thunderx2t99 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mcpu=thunderx2t99 else $(error gcc or clang is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/x86_64/000077500000000000000000000000001474157777200145345ustar00rootroot00000000000000blis-1.1/config/x86_64/bli_family_x86_64.h000066400000000000000000000033061474157777200200340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-1.1/config/x86_64/make_defs.mk000066400000000000000000000061301474157777200170030ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := x86_64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xSSE3 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/zen/000077500000000000000000000000001474157777200143725ustar00rootroot00000000000000blis-1.1/config/zen/amd_config.mk000066400000000000000000000057161474157777200170220ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # All the common flags for AMD architectures will be added here # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -fomit-frame-pointer endif # Flags specific to optimized kernels. # NOTE: The -fomit-frame-pointer option is needed for some kernels because # they make explicit use of the rbp register. CKOPTFLAGS := $(COPTFLAGS) -O3 ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx2 -mfpmath=sse -mfma else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx2 -mfpmath=sse -mfma ifeq ($(strip $(shell clang -v |& head -1 | grep -c 'AOCC.LLVM')),1) CKVECFLAGS += -mllvm -disable-licm-vrp endif else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif blis-1.1/config/zen/bli_cntx_init_zen.c000066400000000000000000000325561474157777200202520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2020-2022, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" //GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) void bli_cntx_init_zen( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_zen_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // gemm BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, // gemmsup BLIS_GEMMSUP_RRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_CRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16m, BLIS_GEMMSUP_RCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_CRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16n, BLIS_GEMMSUP_CCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, #if 0 BLIS_GEMMSUP_RRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_RRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16m, BLIS_GEMMSUP_RCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_RCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, BLIS_GEMMSUP_CRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_CRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16n, BLIS_GEMMSUP_CCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, BLIS_GEMMSUP_CCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, #endif #if 0 // NOTE: This set of kernels is likely broken and therefore disabled. BLIS_GEMMSUP_RRR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_CRR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_RCR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_CRR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_RCC_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, BLIS_GEMMSUP_CCR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, BLIS_GEMMSUP_CCC_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, #endif // packm BLIS_PACKM_MRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_16xk, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_4xk, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, // copyv BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_zen_int, BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_zen_int, // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, // setv BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_zen_int, BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_zen_int, // swapv BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_zen_int8, BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_zen_int8, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // gemm BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmsup BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, #if 0 // NOTE: This set of kernels is likely broken and therefore disabled. BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, #endif BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); /* Multi Instance performance improvement of DGEMM when binded to a CCX In Multi instance each thread runs a sequential DGEMM. a) If BLIS is run in a multi-instance mode with CPU freq 2.6/2.2 Ghz DDR4 clock frequency 2400Mhz mc = 240, kc = 512, and nc = 2040 has better performance on EPYC server, over the default block sizes. b) If BLIS is run in Single Instance mode mc = 510, kc = 1024 and nc = 4080 */ #ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES // Zen optmized level 3 cache block sizes #if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1020, 510, 510, 255 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 1024, 1024, 1024, 1024 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 2040, 1528 ); #endif #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MT ], 512, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NT ], 512, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KT ], 440, 220, -1, -1 ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR_SUP ], 6, 6, -1, -1, 9, 9, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR_SUP ], 16, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC_SUP ], 144, 72, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC_SUP ], 256, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC_SUP ], 8160, 4080, -1, -1 ); #if 0 bli_blksz_init ( &blkszs[ BLIS_MR_SUP ], 6, 6, 3, 3, 9, 9, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR_SUP ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC_SUP ], 144, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_KC_SUP ], 512, 256, 128, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_NC_SUP ], 8160, 4080, 2040, 1020 ); #endif // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, // sup thresholds BLIS_MT, &blkszs[ BLIS_MT ], BLIS_MT, BLIS_NT, &blkszs[ BLIS_NT ], BLIS_NT, BLIS_KT, &blkszs[ BLIS_KT ], BLIS_KT, // gemmsup BLIS_NC_SUP, &blkszs[ BLIS_NC_SUP ], BLIS_NR_SUP, BLIS_KC_SUP, &blkszs[ BLIS_KC_SUP ], BLIS_KR_SUP, BLIS_MC_SUP, &blkszs[ BLIS_MC_SUP ], BLIS_MR_SUP, BLIS_NR_SUP, &blkszs[ BLIS_NR_SUP ], BLIS_NR_SUP, BLIS_MR_SUP, &blkszs[ BLIS_MR_SUP ], BLIS_MR_SUP, BLIS_VA_END ); // ------------------------------------------------------------------------- #if 0 // Initialize the context with the sup handlers. bli_cntx_set_l3_sup_handlers ( cntx, BLIS_GEMM, bli_gemmsup_ref, //BLIS_GEMMT, bli_gemmtsup_ref, BLIS_VA_END ); #endif } blis-1.1/config/zen/bli_family_zen.h000066400000000000000000000064001474157777200175260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops // to be not paralleized. #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 #define BLIS_ENABLE_ZEN_BLOCK_SIZES // Vanilla BLIS disables AMD's small matrix handling by default. #if 0 #define BLIS_ENABLE_SMALL_MATRIX #define BLIS_ENABLE_SMALL_MATRIX_TRSM // This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 #define BLIS_SMALL_K_RECT_MATRIX_THRES 128 #define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n) #define BLIS_SMALL_MATRIX_A_THRES_TRSM 128 #define BLIS_SMALL_MATRIX_A_THRES_M_GEMMT 96 #define BLIS_SMALL_MATRIX_A_THRES_N_GEMMT 128 //This macro will enable BLIS DGEMM to choose block sizes for a single instance mode #define BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES 0 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES 250 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES 90 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22 #endif #if 0 // Allow the sup implementation to combine some small edge case iterations in // the 2nd loop of the panel-block algorithm (MR) and/or the 2nd loop of the // block-panel algorithm (NR) with the last full iteration that precedes it. // NOTE: These cpp macros need to be explicitly set to an integer since they // are used at compile-time to create unconditional branches or dead code // regions. #define BLIS_ENABLE_SUP_MR_EXT 1 #define BLIS_ENABLE_SUP_NR_EXT 0 #endif blis-1.1/config/zen/bli_kernel_defs_zen.h000066400000000000000000000037231474157777200205330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 6 #define BLIS_MR_d 6 #define BLIS_MR_c 3 #define BLIS_MR_z 3 #define BLIS_NR_s 16 #define BLIS_NR_d 8 #define BLIS_NR_c 8 #define BLIS_NR_z 4 //#endif blis-1.1/config/zen/make_defs.mk000066400000000000000000000065471474157777200166550ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -fomit-frame-pointer endif # Flags specific to optimized and reference kernels. # NOTE: The -fomit-frame-pointer option is needed for some kernels because # they make explicit use of the rbp register. CKOPTFLAGS := $(COPTFLAGS) -O3 CROPTFLAGS := $(CKOPTFLAGS) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast ifeq ($(CC_VENDOR),gcc) ifeq ($(GCC_OT_6_1_0),yes) # gcc versions older than 6.1. CVECFLAGS_VER := -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store endif else ifeq ($(CC_VENDOR),clang) CVECFLAGS_VER := -march=znver1 else ifeq ($(CC_VENDOR),aocc) CVECFLAGS_VER := -march=znver1 -mllvm -disable-licm-vrp else $(error gcc, clang, or aocc is required for this configuration.) endif endif endif CKVECFLAGS += $(CVECFLAGS_VER) CRVECFLAGS += $(CVECFLAGS_VER) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/zen/make_defs.mk.old000066400000000000000000000066211474157777200174230ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # FLAGS that are specific to the 'zen' architecture are added here. # FLAGS that are common for all the AMD architectures are present in # amd_config.mk. # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # Include the file containing common flags for all AMD architectures. AMD_CONFIG_FILE := amd_config.mk AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen -include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE) ifeq ($(CC_VENDOR),gcc) # If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the # Bulldozer instruction sets that were omitted from Zen. # Additionally, if gcc is 4.9 (clang 3.5?) or newer, we may want to add # Zen-specific instructions back into the mix: # -mclzero -madx -mrdseed -mmwaitx -msha -mxsavec -mxsaves -mclflushopt -mpopcnt ifeq ($(GCC_OT_6_1_0),yes) CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else # If gcc is at least 6.1.0, then we can specify the microarchitecture using # the preferred option. CRVECFLAGS += -march=znver1 CKVECFLAGS += -march=znver1 endif else ifeq ($(CC_VENDOR),clang) # I couldn't find which versions of clang added support for -march=znver1, # so we don't even bother attempting the differentiation that appears in the # gcc branch above. CRVECFLAGS += -march=znver1 CKVECFLAGS += -march=znver1 else $(error gcc or clang are required for this configuration.) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/zen/old/000077500000000000000000000000001474157777200151505ustar00rootroot00000000000000blis-1.1/config/zen/old/bli_kernel.h000066400000000000000000000165661474157777200174450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // // threading related // By default it is effective to paralleize the // outerloops. Setting these macros to 1 will force // JR and NR inner loops to be not paralleized. #define BLIS_DEFAULT_MR_THREAD_MAX 1 #define BLIS_DEFAULT_NR_THREAD_MAX 1 // sgemm micro-kernel #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_24x4 #define BLIS_DEFAULT_MC_S 264 #define BLIS_DEFAULT_KC_S 128 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 24 #define BLIS_DEFAULT_NR_S 4 #endif #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 6 #endif #if 1 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 6 #define BLIS_DEFAULT_NR_S 16 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // dgemm micro-kernel #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_12x4 #define BLIS_DEFAULT_MC_D 96 #define BLIS_DEFAULT_KC_D 192 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 12 #define BLIS_DEFAULT_NR_D 4 #endif #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 6 #endif #if 1 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8 #define BLIS_DEFAULT_MC_D 510 // 72 /* Improves performance for large Matrices */ #define BLIS_DEFAULT_KC_D 1024 // 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // cgemm micro-kernel #if 1 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_3x8 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 3 #define BLIS_DEFAULT_NR_C 8 #define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // zgemm micro-kernel #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // zgemm micro-kernel #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // -- trsm-related -- #define BLIS_STRSM_L_UKERNEL bli_strsm_l_int_6x16 #define BLIS_DTRSM_L_UKERNEL bli_dtrsm_l_int_6x8 // --gemmtrsm-related -- #define BLIS_SGEMMTRSM_L_UKERNEL bli_sgemmtrsm_l_6x16 #define BLIS_DGEMMTRSM_L_UKERNEL bli_dgemmtrsm_l_6x8 #define BLIS_SMALL_MATRIX_ENABLE //This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 #define BLIS_SMALL_K_RECT_MATRIX_THRES 128 gint_t bli_gemm_small_matrix ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- #define BLIS_DEFAULT_1F_S 8 #define BLIS_DEFAULT_1F_D 4 // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- #define BLIS_SAXPYF_KERNEL bli_saxpyf_int_var1 #define BLIS_DAXPYF_KERNEL bli_daxpyf_int_var1 // -- dotxf -- #define BLIS_SDOTXF_KERNEL bli_sdotxf_int_var1 #define BLIS_DDOTXF_KERNEL bli_ddotxf_int_var1 // -- dotxaxpyf -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- amax -- #define BLIS_SAMAXV_KERNEL bli_samaxv_opt_var1 #define BLIS_DAMAXV_KERNEL bli_damaxv_opt_var1 // -- addv -- // -- axpyv -- #define BLIS_DAXPYV_KERNEL bli_daxpyv_opt_var10 #define BLIS_SAXPYV_KERNEL bli_saxpyv_opt_var10 // -- copyv -- // -- dotv -- #define BLIS_DDOTV_KERNEL bli_ddotv_opt_var1 #define BLIS_SDOTV_KERNEL bli_sdotv_opt_var1 // -- dotxv -- #define BLIS_SDOTXV_KERNEL bli_sdotxv_unb_var1 #define BLIS_DDOTXV_KERNEL bli_ddotxv_unb_var1 // -- invertv -- // -- scal2v -- // -- scalv -- #define BLIS_SSCALV_KERNEL bli_sscalv_opt_var2 #define BLIS_DSCALV_KERNEL bli_dscalv_opt_var2 // -- setv -- // -- subv -- // -- swapv -- #endif blis-1.1/config/zen2/000077500000000000000000000000001474157777200144545ustar00rootroot00000000000000blis-1.1/config/zen2/bli_cntx_init_zen2.c000066400000000000000000000271351474157777200204130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2020-2022, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_zen2( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_zen2_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // gemm BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, // level-3 sup BLIS_GEMMSUP_RRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_CRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16m, BLIS_GEMMSUP_RCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_CRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16n, BLIS_GEMMSUP_CCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, #if 0 BLIS_GEMMSUP_RRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_RRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16m, BLIS_GEMMSUP_RCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_RCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, BLIS_GEMMSUP_CRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_CRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16n, BLIS_GEMMSUP_CCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, BLIS_GEMMSUP_CCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, #endif #if 0 // NOTE: This set of kernels is likely broken and therefore disabled. BLIS_GEMMSUP_RRR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_CRR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_CCR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_RCR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_CRR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_RCC_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, BLIS_GEMMSUP_CCR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, BLIS_GEMMSUP_CCC_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, #endif // packm BLIS_PACKM_MRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_16xk, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_4xk, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_5, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_5, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int10, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int10, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, //swap BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_zen_int8, BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_zen_int8, //copy BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_zen_int, BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_zen_int, //set BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_zen_int, BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_zen_int, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // gemm BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // level-3 sup BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); #if AOCL_BLIS_MULTIINSTANCE bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Initialize sup thresholds with architecture-appropriate values. // s d c z #if 1 bli_blksz_init_easy( &blkszs[ BLIS_MT ], 500, 249, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NT ], 500, 249, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KT ], 500, 249, -1, -1 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MT ], 100000, 100000, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NT ], 100000, 100000, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KT ], 100000, 100000, -1, -1 ); #endif // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR_SUP ], 6, 6, -1, -1, 9, 9, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR_SUP ], 16, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC_SUP ], 168, 72, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC_SUP ], 256, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC_SUP ], 4080, 4080, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, // sup thresholds BLIS_MT, &blkszs[ BLIS_MT ], BLIS_MT, BLIS_NT, &blkszs[ BLIS_NT ], BLIS_NT, BLIS_KT, &blkszs[ BLIS_KT ], BLIS_KT, // level-3 sup BLIS_NC_SUP, &blkszs[ BLIS_NC_SUP ], BLIS_NC_SUP, BLIS_KC_SUP, &blkszs[ BLIS_KC_SUP ], BLIS_KC_SUP, BLIS_MC_SUP, &blkszs[ BLIS_MC_SUP ], BLIS_MC_SUP, BLIS_NR_SUP, &blkszs[ BLIS_NR_SUP ], BLIS_NR_SUP, BLIS_MR_SUP, &blkszs[ BLIS_MR_SUP ], BLIS_MR_SUP, BLIS_VA_END ); // ------------------------------------------------------------------------- #if 0 // Initialize the context with the sup handlers. bli_cntx_set_l3_sup_handlers ( cntx, BLIS_GEMM, bli_gemmsup_ref, //BLIS_GEMMT, bli_gemmtsup_ref, BLIS_VA_END ); #endif } blis-1.1/config/zen2/bli_family_zen2.h000066400000000000000000000073631474157777200177030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops // to be not paralleized. #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 // Vanilla BLIS disables AMD's small matrix handling by default. #if 0 #define BLIS_ENABLE_SMALL_MATRIX #define BLIS_ENABLE_SMALL_MATRIX_TRSM // This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 #define BLIS_SMALL_K_RECT_MATRIX_THRES 128 #define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n) #define BLIS_SMALL_MATRIX_A_THRES_TRSM 128 #define BLIS_SMALL_MATRIX_A_THRES_M_GEMMT 96 #define BLIS_SMALL_MATRIX_A_THRES_N_GEMMT 128 #define BLIS_ENABLE_SMALL_MATRIX_ROME #define BLIS_SMALL_MATRIX_THRES_ROME 400 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 80 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M 40 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M 1000 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N 10 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M 5 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N 130 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME 120 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M 10 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N 1200 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M 30 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N 280 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N 100 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME 110 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N 30 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME 120 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME_COL_PANEL_N 50 // When running HPL with pure MPI without DGEMM threading (Single-threaded // BLIS), defining this macro as 1 yields better performance. #define AOCL_BLIS_MULTIINSTANCE 0 #endif blis-1.1/config/zen2/bli_kernel_defs_zen2.h000066400000000000000000000037231474157777200206770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 6 #define BLIS_MR_d 6 #define BLIS_MR_c 3 #define BLIS_MR_z 3 #define BLIS_NR_s 16 #define BLIS_NR_d 8 #define BLIS_NR_c 8 #define BLIS_NR_z 4 //#endif blis-1.1/config/zen2/make_defs.mk000066400000000000000000000075641474157777200167370ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen2 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 -fomit-frame-pointer endif # Flags specific to optimized and reference kernels. # NOTE: The -fomit-frame-pointer option is needed for some kernels because # they make explicit use of the rbp register. CKOPTFLAGS := $(COPTFLAGS) -O3 CROPTFLAGS := $(CKOPTFLAGS) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast ifeq ($(CC_VENDOR),gcc) ifeq ($(GCC_OT_6_1_0),yes) # gcc versions older than 6.1. CVECFLAGS_VER := -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else ifeq ($(GCC_OT_9_1_0),yes) # gcc versions 6.1 or newer, but older than 9.1. CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store else # gcc versions 9.1 or newer. CVECFLAGS_VER := -march=znver2 endif endif else ifeq ($(CC_VENDOR),clang) ifeq ($(CLANG_OT_9_0_0),yes) # clang versions older than 9.0. CVECFLAGS_VER := -march=znver1 else # clang versions 9.0 or newer. CVECFLAGS_VER := -march=znver2 endif else ifeq ($(CC_VENDOR),aocc) ifeq ($(AOCC_OT_2_0_0),yes) # aocc versions older than 2.0. CVECFLAGS_VER := -march=znver1 -mllvm -disable-licm-vrp else # aocc versions 2.0 or newer. CVECFLAGS_VER := -march=znver2 endif else $(error gcc, clang, or aocc is required for this configuration.) endif endif endif CKVECFLAGS += $(CVECFLAGS_VER) CRVECFLAGS += $(CVECFLAGS_VER) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/zen2/make_defs.mk.old000066400000000000000000000073751474157777200175140ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # FLAGS that are specific to the 'zen2' architecture are added here. # FLAGS that are common for all the AMD architectures are present in # config/zen/amd_config.mk. # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen2 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # Include file containing common flags for all AMD architectures. AMD_CONFIG_FILE := amd_config.mk AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen -include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE) ifeq ($(CC_VENDOR),gcc) ifeq ($(GCC_OT_9_1_0),yes) ifeq ($(GCC_OT_6_1_0),yes) # If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the # Bulldozer instruction sets that were omitted from Zen. CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else # If gcc is older than 9.1.0 but at least 6.1.0, then we can use -march=znver1 # as the fallback option. CRVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store CKVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store endif else # If gcc is at least 9.1.0, then we can specify the microarchitecture using # the preferred option. CRVECFLAGS += -march=znver2 CKVECFLAGS += -march=znver2 endif else ifeq ($(CC_VENDOR),clang) ifeq ($(strip $(shell $(CC) -v |&head -1 |grep -c 'AOCC.LLVM.2\|AOCC_2')),1) CKVECFLAGS += -march=znver2 else #if compiling with clang VENDOR_STRING := $(strip $(shell ${CC_VENDOR} --version | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*')) CC_MAJOR := $(shell (echo ${VENDOR_STRING} | cut -d. -f1)) #clang 9.0 or later: ifeq ($(shell test $(CC_MAJOR) -ge 9; echo $$?),0) CKVECFLAGS += -march=znver2 else CKVECFLAGS += -march=znver1 endif # ge 9 endif # AOCC 2 endif # Clang endif # gcc # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/zen3/000077500000000000000000000000001474157777200144555ustar00rootroot00000000000000blis-1.1/config/zen3/bli_cntx_init_zen3.c000066400000000000000000000314321474157777200204100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2020-2022, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_zen3( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_zen3_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels. bli_cntx_set_ukrs ( cntx, // gemm BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, // gemmsup #if 0 // AMD: This should be enabled in the PR which has added these kernels // Update the context with optimized small/unpacked gemm kernels. BLIS_GEMMSUP_RRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_CRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_RRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16m, BLIS_GEMMSUP_RCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_RCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, BLIS_GEMMSUP_CRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16m, BLIS_GEMMSUP_CRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_zen_asm_6x16n, BLIS_GEMMSUP_CCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, BLIS_GEMMSUP_CCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x16n, BLIS_GEMMSUP_RRR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_CRR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_RCR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_CRR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4m, BLIS_GEMMSUP_RCC_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, BLIS_GEMMSUP_CCR_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, BLIS_GEMMSUP_CCC_UKR, BLIS_DCOMPLEX, bli_zgemmsup_rv_zen_asm_3x4n, #else BLIS_GEMMSUP_RRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, BLIS_GEMMSUP_RCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_RCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CRR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, BLIS_GEMMSUP_CRC_UKR, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, BLIS_GEMMSUP_CCR_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_CCC_UKR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, BLIS_GEMMSUP_RRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16m, BLIS_GEMMSUP_RCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_RCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CRR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16m, BLIS_GEMMSUP_CRC_UKR, BLIS_FLOAT, bli_sgemmsup_rd_haswell_asm_6x16n, BLIS_GEMMSUP_CCR_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, BLIS_GEMMSUP_CCC_UKR, BLIS_FLOAT, bli_sgemmsup_rv_haswell_asm_6x16n, #endif // packm #if 0 // AMD: This will be enabled in other PRs. BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_6xk_gen_zen, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_8xk_gen_zen, #else BLIS_PACKM_MRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_FLOAT, bli_spackm_haswell_asm_16xk, BLIS_PACKM_MRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_6xk, BLIS_PACKM_NRXK_KER, BLIS_DOUBLE, bli_dpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_SCOMPLEX, bli_cpackm_haswell_asm_8xk, BLIS_PACKM_MRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_3xk, BLIS_PACKM_NRXK_KER, BLIS_DCOMPLEX, bli_zpackm_haswell_asm_4xk, #endif // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_5, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_5, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int10, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int10, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, // swapv BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_zen_int8, BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_zen_int8, // copyv BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_zen_int, BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_zen_int, // setv BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_zen_int, BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_zen_int, BLIS_VA_END ); // Update the context with storage preferences. bli_cntx_set_ukr_prefs ( cntx, // gemm BLIS_GEMM_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMM_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_L_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMTRSM_U_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, // gemmsup BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_FLOAT, TRUE, BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CRC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_DOUBLE, TRUE, #if 0 // AMD: This should be enabled in the PR which has added these kernels // Update the context with optimized small/unpacked gemm kernels. BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_SCOMPLEX, TRUE, BLIS_GEMMSUP_RRR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_RCR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_CRR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_RCC_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_CCR_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, BLIS_GEMMSUP_CCC_UKR_ROW_PREF, BLIS_DCOMPLEX, TRUE, #endif BLIS_VA_END ); // Initialize level-3 blocksize objects with architecture-specific values. // // These are reference block sizes and may be overridden based on // number of threads used at runtime. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MT ], 512, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NT ], 200, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KT ], 240, 220, -1, -1 ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR_SUP ], 6, 6, 3, 3, 9, 9, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR_SUP ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC_SUP ], 144, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_KC_SUP ], 512, 256, 128, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_NC_SUP ], 8160, 4080, 2040, 1020 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( cntx, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, // sup thresholds BLIS_MT, &blkszs[ BLIS_MT ], BLIS_MT, BLIS_NT, &blkszs[ BLIS_NT ], BLIS_NT, BLIS_KT, &blkszs[ BLIS_KT ], BLIS_KT, // gemmsup BLIS_NC_SUP, &blkszs[ BLIS_NC_SUP ], BLIS_NR_SUP, BLIS_KC_SUP, &blkszs[ BLIS_KC_SUP ], BLIS_KR_SUP, BLIS_MC_SUP, &blkszs[ BLIS_MC_SUP ], BLIS_MR_SUP, BLIS_NR_SUP, &blkszs[ BLIS_NR_SUP ], BLIS_NR_SUP, BLIS_MR_SUP, &blkszs[ BLIS_MR_SUP ], BLIS_MR_SUP, BLIS_VA_END ); // ------------------------------------------------------------------------- #if 0 // Initialize the context with the sup handlers. bli_cntx_set_l3_sup_handlers ( cntx, BLIS_GEMM, bli_gemmsup_ref, //BLIS_GEMMT, bli_gemmtsup_ref, BLIS_VA_END ); #endif } blis-1.1/config/zen3/bli_family_zen3.h000066400000000000000000000074641474157777200177070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLI_FAMILY_ZEN3_ #define BLI_FAMILY_ZEN3_ // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops // to be not paralleized. // #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 // To enable framework optimizations for zen3 platform // All zen3 specific code should be included in this macro #define BLIS_CONFIG_ZEN3 // To enable framework optimizations for zen3 platform // All zen3 specific code should be included in this macro #define BLIS_CONFIG_ZEN3 //#define BLIS_ENABLE_SMALL_MATRIX //#define BLIS_ENABLE_SMALL_MATRIX_TRSM // This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 #define BLIS_SMALL_K_RECT_MATRIX_THRES 128 #define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n) #define BLIS_SMALL_MATRIX_A_THRES_TRSM 128 #define BLIS_SMALL_MATRIX_A_THRES_M_GEMMT 96 #define BLIS_SMALL_MATRIX_A_THRES_N_GEMMT 128 #define BLIS_ENABLE_SMALL_MATRIX_ROME #define BLIS_SMALL_MATRIX_THRES_ROME 400 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 80 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_ROW_PANEL_M 40 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_M 1000 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME_COLUMN_PANEL_N 10 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_ROW_PANEL_M 5 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME_COLUMN_PANEL_N 130 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME 120 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_M 10 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_ROW_PANEL_N 1200 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_M 30 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_SQUARE_N 280 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALTB_ROME_COLUMN_PANEL_N 100 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME 110 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XALB_ROME_COL_PANEL_N 30 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME 120 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUTB_ROME_COL_PANEL_N 50 #endif blis-1.1/config/zen3/bli_kernel_defs_zen3.h000066400000000000000000000037231474157777200207010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2022, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_KERNEL_DEFS_H //#define BLIS_KERNEL_DEFS_H // -- REGISTER BLOCK SIZES (FOR REFERENCE KERNELS) ---------------------------- #define BLIS_MR_s 6 #define BLIS_MR_d 6 #define BLIS_MR_c 3 #define BLIS_MR_z 3 #define BLIS_NR_s 16 #define BLIS_NR_d 8 #define BLIS_NR_c 8 #define BLIS_NR_z 4 //#endif blis-1.1/config/zen3/make_defs.mk000066400000000000000000000110421474157777200167220ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen3 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized and reference kernels. # NOTE: The -fomit-frame-pointer option is needed for some kernels because # they make explicit use of the rbp register. CKOPTFLAGS := $(COPTFLAGS) -fomit-frame-pointer CROPTFLAGS := $(CKOPTFLAGS) CKVECFLAGS := -mavx2 -mfma CRVECFLAGS := $(CKVECFLAGS) ifeq ($(CC_VENDOR),gcc) ifeq ($(GCC_OT_9_1_0),yes) # gcc versions older than 9.1. CVECFLAGS_VER := -march=znver1 -mno-avx256-split-unaligned-store else ifeq ($(GCC_OT_10_3_0),yes) # gcc versions 9.1 or newer, but older than 10.3. CVECFLAGS_VER := -march=znver2 else # gcc versions 10.1 or newer. CVECFLAGS_VER := -march=znver3 endif endif CKVECFLAGS += -mfpmath=sse CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) ifeq ($(CLANG_OT_9_0_0),yes) # clang versions older than 9.0. CVECFLAGS_VER := -march=znver1 else ifeq ($(CLANG_OT_12_0_0),yes) # clang versions 9.0 or newer, but older than 12.0. CVECFLAGS_VER := -march=znver2 else ifeq ($(OS_NAME),Darwin) # clang version 12.0 on OSX lacks znver3 support CVECFLAGS_VER := -march=znver2 else # clang versions 12.0 or newer. CVECFLAGS_VER := -march=znver3 endif endif endif CKVECFLAGS += -mfpmath=sse CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),aocc) ifeq ($(AOCC_OT_2_0_0),yes) # aocc versions older than 2.0. CVECFLAGS_VER := -march=znver1 else ifeq ($(AOCC_OT_3_0_0),yes) # aocc versions 2.0 or newer, but older than 3.0. CVECFLAGS_VER := -march=znver2 else # aocc versions 3.0 or newer. CVECFLAGS_VER := -march=znver3 endif endif CKVECFLAGS += -mfpmath=sse CRVECFLAGS += -funsafe-math-optimizations -ffp-contract=fast ifeq ($(CC_VENDOR),nvc) CVECFLAGS_VER := -march=znver3 CRVECFLAGS += -fast else $(error gcc, clang, nvc or aocc is required for this configuration.) endif endif endif endif CKVECFLAGS += $(CVECFLAGS_VER) CRVECFLAGS += $(CVECFLAGS_VER) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config/zen3/make_defs.mk.old000066400000000000000000000116351474157777200175070ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # FLAGS that are specific to the 'zen3' architecture are added here. # FLAGS that are common for all the AMD architectures are present in # config/zen/amd_config.mk. # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen3 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := -fPIC CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else #frame pointers are needed to execution tracing ifeq ($(ETRACE_ENABLE),1) COPTFLAGS := -O3 else COPTFLAGS := -O3 -fomit-frame-pointer endif endif # # --- Enable ETRACE across the library if enabled ETRACE_ENABLE=[0,1] ----------------------- # ifeq ($(ETRACE_ENABLE),1) CDBGFLAGS += -pg -finstrument-functions -DAOCL_DTL_AUTO_TRACE_ENABLE LDFLAGS += -ldl endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) GCC_VERSION := $(strip $(shell $(CC) -dumpversion | cut -d. -f1)) #gcc or clang version must be atleast 4.0 # gcc 9.0 or later: ifeq ($(shell test $(GCC_VERSION) -ge 9; echo $$?),0) CKVECFLAGS += -march=znver2 else # If gcc is older than 9.1.0 but at least 6.1.0, then we can use -march=znver1 # as the fallback option. CRVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store CKVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store endif else ifeq ($(CC_VENDOR),clang) # AOCC clang has various formats for the version line # AOCC.LLVM.2.0.0.B191.2019_07_19 clang version 8.0.0 (CLANG: Jenkins AOCC_2_0_0-Build#191) (based on LLVM AOCC.LLVM.2.0.0.B191.2019_07_19) # AOCC.LLVM.2.1.0.B1030.2019_11_12 clang version 9.0.0 (CLANG: Build#1030) (based on LLVM AOCC.LLVM.2.1.0.B1030.2019_11_12) # AMD clang version 10.0.0 (CLANG: AOCC_2.2.0-Build#93 2020_06_25) (based on LLVM Mirror.Version.10.0.0) # AMD clang version 11.0.0 (CLANG: AOCC_2.3.0-Build#85 2020_11_10) (based on LLVM Mirror.Version.11.0.0) # AMD clang version 12.0.0 (CLANG: AOCC_3.0.0-Build#2 2020_11_05) (based on LLVM Mirror.Version.12.0.0) # For our prupose we just want to know if it version 2x or 3x # for version 3x we will enable znver3 ifeq ($(strip $(shell $(CC) -v |&head -1 |grep -c 'AOCC_3')),1) CKVECFLAGS += -march=znver3 else # for version 2x we will enable znver2 ifeq ($(strip $(shell $(CC) -v |&head -1 |grep -c 'AOCC.LLVM.2\|AOCC_2')),1) CKVECFLAGS += -march=znver2 else #if compiling with clang VENDOR_STRING := $(strip $(shell ${CC_VENDOR} --version | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*')) CC_MAJOR := $(shell (echo ${VENDOR_STRING} | cut -d. -f1)) #clang 9.0 or later: ifeq ($(shell test $(CC_MAJOR) -ge 9; echo $$?),0) CKVECFLAGS += -march=znver2 else CKVECFLAGS += -march=znver1 endif # ge 9 endif # aocc 2 endif # aocc 3 endif # clang endif # gcc # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) CRVECFLAGS := $(CKVECFLAGS) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-1.1/config_registry000066400000000000000000000033031474157777200154500ustar00rootroot00000000000000# # config_registry # # Please refer to the BLIS wiki on configurations for information on the # syntax and semantics of this file [1]. # # [1] https://github.com/flame/blis/blob/master/docs/ConfigurationHowTo.md # # Processor families. x86_64: intel64 amd64 amd64_legacy intel64: skx knl haswell sandybridge penryn generic amd64_legacy: excavator steamroller piledriver bulldozer generic amd64: zen3 zen2 zen generic arm64: armsve firestorm thunderx2 cortexa57 cortexa53 generic arm32: cortexa15 cortexa9 generic power: power10 power9 generic # Intel architectures. skx: skx/skx/haswell/zen knl: knl/knl/haswell/zen haswell: haswell/haswell/zen sandybridge: sandybridge penryn: penryn # AMD architectures. zen3: zen3/zen3/zen2/zen/haswell zen2: zen2/zen2/zen/haswell zen: zen/zen/haswell excavator: excavator/piledriver steamroller: steamroller/piledriver piledriver: piledriver bulldozer: bulldozer # ARM architectures. armsve: armsve/armsve a64fx: a64fx/armsve # ARM Neon64 (4 pipes x 128b) architectures. altramax: altramax/armv8a altra: altra/armv8a firestorm: firestorm/armv8a # ARM (2 pipes x 128b) architectures. thunderx2: thunderx2/armv8a cortexa57: cortexa57/armv8a cortexa53: cortexa53/armv8a # ARM Vintage architectures. cortexa15: cortexa15/armv7a cortexa9: cortexa9/armv7a # IBM architectures. power10: power10 power9: power9 bgq: bgq # RISC-V architectures. rv32i: rv32i/rvi rv64i: rv64i/rvi rv32iv: rv32iv/rviv rv64iv: rv64iv/rviv # SiFive architectures. sifive_x280: sifive_x280 # Generic architectures. generic: generic blis-1.1/configure000077500000000000000000004504711474157777200142530ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2020-2022, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # shellcheck disable=2001,2249,2034,2154,2181,2312,2250,2292 # # -- Helper functions ---------------------------------------------------------- # print_usage() { # Use the version string in the 'version' file since we don't have # the patched version string yet. if [ -z "${version}" ]; then version=$(<"${version_filepath}") fi # Echo usage info. cat <